diff --git a/wiki_relations.py b/wiki_relations.py index 80e8c9f..7056a71 100644 --- a/wiki_relations.py +++ b/wiki_relations.py @@ -4,6 +4,7 @@ import requests import argparse import datetime import tqdm +import csv @@ -12,11 +13,12 @@ logger = logging.getLogger('wiki') default_categories = [ # 'Person', 'Institution', - 'Technology', + 'Products', 'Deployments', 'Dataset', 'City', - # 'Country',# for deployments without city we should configure Geolocation + #'Country',# for deployments without city we should configure Geolocation + 'Technology Type', ] parser = argparse.ArgumentParser(description='Turn wiki into nodes & links, usable by d3-force.') @@ -28,6 +30,8 @@ parser.add_argument('--output', default="semantic_data.json", help='Output JSON file') parser.add_argument('--credentials', default="no_credentials.json", help="JSON file containing the Bot's credentials") +parser.add_argument('--generate-csv', action='store_true', + help="generate edge.csv & nodes.csv") args = parser.parse_args() @@ -257,3 +261,17 @@ if __name__ == "__main__": with open(args.output, 'w') as fp: json.dump(collection, fp) + if args.generate_csv: + with open('nodes.csv', 'w') as csvfile: + all_keys = set().union(*(d.keys() for d in collection['nodes'])) + # all_keys = ['@id'] + dict_writer = csv.DictWriter(csvfile, fieldnames=all_keys, extrasaction='ignore', restval='') + dict_writer.writeheader() + dict_writer.writerows(collection['nodes']) + + with open('edges.csv', 'w') as csvfile: + all_keys = set().union(*(d.keys() for d in collection['links'])) + dict_writer = csv.DictWriter(csvfile, fieldnames=all_keys, extrasaction='ignore', restval='') + dict_writer.writeheader() + dict_writer.writerows(collection['links']) + \ No newline at end of file