Institution for developing agency

This commit is contained in:
Ruben van de Ven 2022-07-14 15:33:11 +02:00
parent a4f98a6492
commit 54c42f0a82
2 changed files with 41 additions and 6 deletions

View File

@ -201,6 +201,8 @@ def mapDeployment(entry):
global args
city = mapCity(entry)
mapInstitution(entry)
if entry['Vendor'] and len(entry['Vendor'].strip()) > 0:
mapDeveloperInstitution(entry['Vendor'])
tech = mapTechnology(entry)
return {
'title': f"{entry['Vendor']} {entry['Technology']} used by {entry['Agency']}".replace(' '," ").strip(),
@ -295,6 +297,23 @@ def mapInstitution(entry):
else:
institutions[entry['Agency']] = info
def mapDeveloperInstitution(title):
if title in institutions:
return
institutions[title] = {
'title': title,
'@type': 'Institution',
'properties': {
},
"additionalProperties": {
"Needs content": "Yes",
'CiteRef': args.citeref,
}
}
def mapCity(entry):
title = f"{entry['City']} ({entry['State']})"
if title not in cities:
@ -310,7 +329,7 @@ def mapCity(entry):
}
if not args.skip_geolocation:
location_response = geocoder.geocode(title + " USA")
location_response = geocoder.geocode(f"{entry['City']}, {entry['State']}, USA")
sleep(1) # free tier of location geocode requires 1 sec delay
if location_response:
location = location_response.raw
@ -419,7 +438,7 @@ if __name__ == "__main__":
parsedData=[]
with open(args.csv, newline='') as csvfile:
csvreader = csv.DictReader(csvfile, delimiter=',')
for row in csvreader:
for row in tqdm.tqdm(csvreader):
data = mapEntry(row)
if data is None:
continue
@ -433,10 +452,7 @@ if __name__ == "__main__":
for i, data in enumerate(parsedData):
page = renderPage(data)
# if data['@type'] == "Institution":
# print(data['title'])
# print(page)
# break
# if data['@type'] == 'City': #only for city as to update coordinates
saveIfNotExists(data, page, session, token)
# if i > 5:

19
fix_blockquote.py Normal file
View File

@ -0,0 +1,19 @@
import re, os
regex = r"(?<=(?<=>).)( \[\[CiteRef::atlasofsurveillance2022\]\]<\/blockquote>\n\n<blockquote>)"
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.mw')]
for f in files:
with open(f, 'r') as fp:
contents = fp.read()
# You can manually specify the number of replacements by changing the 4th argument
result = re.sub(regex, "", contents, 0, re.MULTILINE)
if contents != result:
print (f)
with open(f, 'w') as fp:
fp.write(result)