Option to generate nodes and edges CSV files

This commit is contained in:
Ruben van de Ven 2023-01-23 21:55:28 +01:00
parent 3c0184fe97
commit d457c8eea0

View file

@ -4,6 +4,7 @@ import requests
import argparse
import datetime
import tqdm
import csv
@ -12,11 +13,12 @@ logger = logging.getLogger('wiki')
default_categories = [
# 'Person',
'Institution',
'Technology',
'Products',
'Deployments',
'Dataset',
'City',
# 'Country',# for deployments without city we should configure Geolocation
#'Country',# for deployments without city we should configure Geolocation
'Technology Type',
]
parser = argparse.ArgumentParser(description='Turn wiki into nodes & links, usable by d3-force.')
@ -28,6 +30,8 @@ parser.add_argument('--output', default="semantic_data.json",
help='Output JSON file')
parser.add_argument('--credentials', default="no_credentials.json",
help="JSON file containing the Bot's credentials")
parser.add_argument('--generate-csv', action='store_true',
help="generate edge.csv & nodes.csv")
args = parser.parse_args()
@ -257,3 +261,17 @@ if __name__ == "__main__":
with open(args.output, 'w') as fp:
json.dump(collection, fp)
if args.generate_csv:
with open('nodes.csv', 'w') as csvfile:
all_keys = set().union(*(d.keys() for d in collection['nodes']))
# all_keys = ['@id']
dict_writer = csv.DictWriter(csvfile, fieldnames=all_keys, extrasaction='ignore', restval='')
dict_writer.writeheader()
dict_writer.writerows(collection['nodes'])
with open('edges.csv', 'w') as csvfile:
all_keys = set().union(*(d.keys() for d in collection['links']))
dict_writer = csv.DictWriter(csvfile, fieldnames=all_keys, extrasaction='ignore', restval='')
dict_writer.writeheader()
dict_writer.writerows(collection['links'])