import csv import json # node_names = set() movements = [] items = [] #edges libraries = {} locations = {} with open("data/locaties.csv") as fp: reader = csv.DictReader(fp, delimiter=";") for item in reader: locatie = item['Locatie'].split(',') try: lat, lon = locatie except ValueError as e: lat, lon = None, None library = { 'name': item['Library Name'], 'code': item['Library Code'], 'adres': item['Adres'], 'lat': lat, 'lon': lon, } location = { 'location': item['Location Name'], 'code': item['Location Code'], 'library': library } libraries[library['name']] = library locations[location['code']] = location def filter_date(date: str): date = date.replace('cop.', '©').removeprefix('[').removesuffix(']') if len(date) and date[-1] == '.': date = date[:-1] return date def clean_title(title: str) -> str: return title.removesuffix('/').strip() with open("data/batch2/Rapport_transit_1.csv", encoding='utf-8-sig') as fp: # items reader = csv.DictReader(fp, delimiter=",") for item in reader: item['Title'] = clean_title(item['Title']) item['Publication Date'] = filter_date(item['Publication Date']) item['Sort Date'] = item['Publication Date'][-4:] # some dates are ranges, only sort by last year items.append(item) with open("data/batch2/Rapport_transit_2.csv", encoding='utf-8-sig') as fp: # movements reader = csv.DictReader(fp, delimiter=",") for item in reader: movements.append(item) # nodes = [{'name': n} for n in node_names] print(f"{len(libraries)} nodes, {len(movements)} movements of {len(items)} items") data = { 'libraries': list(libraries.values()), #nodes, 'movements': movements, #edges 'items': items, # item bibliographical data } fn = 'data/parsed_transits.json' with open(fn, 'w') as fp: json.dump(data, fp) print(f"Written to {fn}")