diff --git a/csv_importer_atlas-of-surveillance.py b/csv_importer_atlas-of-surveillance.py index 47be63d..93c66e7 100644 --- a/csv_importer_atlas-of-surveillance.py +++ b/csv_importer_atlas-of-surveillance.py @@ -201,6 +201,8 @@ def mapDeployment(entry): global args city = mapCity(entry) mapInstitution(entry) + if entry['Vendor'] and len(entry['Vendor'].strip()) > 0: + mapDeveloperInstitution(entry['Vendor']) tech = mapTechnology(entry) return { 'title': f"{entry['Vendor']} {entry['Technology']} used by {entry['Agency']}".replace(' '," ").strip(), @@ -295,6 +297,23 @@ def mapInstitution(entry): else: institutions[entry['Agency']] = info +def mapDeveloperInstitution(title): + + if title in institutions: + return + + institutions[title] = { + 'title': title, + '@type': 'Institution', + 'properties': { + }, + "additionalProperties": { + "Needs content": "Yes", + 'CiteRef': args.citeref, + } + } + + def mapCity(entry): title = f"{entry['City']} ({entry['State']})" if title not in cities: @@ -310,7 +329,7 @@ def mapCity(entry): } if not args.skip_geolocation: - location_response = geocoder.geocode(title + " USA") + location_response = geocoder.geocode(f"{entry['City']}, {entry['State']}, USA") sleep(1) # free tier of location geocode requires 1 sec delay if location_response: location = location_response.raw @@ -419,7 +438,7 @@ if __name__ == "__main__": parsedData=[] with open(args.csv, newline='') as csvfile: csvreader = csv.DictReader(csvfile, delimiter=',') - for row in csvreader: + for row in tqdm.tqdm(csvreader): data = mapEntry(row) if data is None: continue @@ -433,10 +452,7 @@ if __name__ == "__main__": for i, data in enumerate(parsedData): page = renderPage(data) - # if data['@type'] == "Institution": - # print(data['title']) - # print(page) - # break + # if data['@type'] == 'City': #only for city as to update coordinates saveIfNotExists(data, page, session, token) # if i > 5: diff --git a/fix_blockquote.py b/fix_blockquote.py new file mode 100644 index 0000000..d58ea29 --- /dev/null +++ b/fix_blockquote.py @@ -0,0 +1,19 @@ +import re, os + +regex = r"(?<=(?<=>).)( \[\[CiteRef::atlasofsurveillance2022\]\]<\/blockquote>\n\n
)" + +files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.mw')] + +for f in files: + with open(f, 'r') as fp: + contents = fp.read() + + # You can manually specify the number of replacements by changing the 4th argument + result = re.sub(regex, "", contents, 0, re.MULTILINE) + if contents != result: + print (f) + with open(f, 'w') as fp: + fp.write(result) + + +