from pyld import jsonld import json import PageRank.pagerank as pagerank import argparse parser = argparse.ArgumentParser(description='Apply pagerank to the (flattened) JSONLD file') parser.add_argument('--jsonld', '-i', metavar='FILENAME.JSONLD', type=str, required=True, help='Input filename, make sure to use the flattened JSONLD (in our case from the \'assets\' folder)') parser.add_argument('--output', '-o', type=str, default=None, metavar='FILENAME.JSON', help='Optional output filename for the resulting JSON') args = parser.parse_args() # use flattened jsonld with open(args.jsonld, 'r') as fp: contents = json.load(fp)['@graph'] linksPerItem = {} def addCount(fromId, toId, weight): if fromId not in linksPerItem: linksPerItem[fromId] = {} if not toId in linksPerItem[fromId]: linksPerItem[fromId][toId] = 0 linksPerItem[fromId][toId] += weight def addLink(fromId, toId, weight = 1): # Add link both ways: from + to. addCount(fromId, toId, weight) addCount(toId, fromId, weight) for node in contents: currentId = node['@id'] for key, value in node.items(): if type(value) == dict: value = [value] if type(value) == list: for link in value: if not type(link) == dict: # can be any other list, eg. list of urls continue weight = 1 if key == 'https://schema.org/author': weight = 2 elif key == 'https://schema.org/contributor': weight = .5 # print(link) linkedId = link['@id'] addLink(currentId, linkedId) targetMin = .7 targetMax = 1.5 values = pagerank.powerIteration(linksPerItem) normalised = targetMin + (values - min(values)) * (targetMax-targetMin)/max(values) if args.output is None: print(json.dumps(dict(normalised))) else: with open(args.output, 'w') as fp: json.dump(dict(normalised), fp) # # factor = 1 / min(values) # print(factor) # normalised = values * factor # normalised - # # for(let nodeId in data) { # let node = data[nodeId]; # let currentId = node["@id"]; # for(let key in node){ # let nodeAttr = Array.isArray(node[key]) ? node[key] : [node[key]]; # // // relations should always be lists (eases assumptions) # // if(typeof node[key] !== "Array" && typeof node[key]['id'] !== "undefined") { # // node[key] = [node[key]]; # // } # // every attribute is an Array after flatten(), loop them # for(let i in nodeAttr) { # if(key !== "@id" && typeof nodeAttr[i] === "string" && nodes[nodeAttr[i]]) { # links[links.length] = { # "source": currentId, # "target": nodeAttr[i], # "name": key # }; # } # else if(typeof nodeAttr[i]["@id"] !== "undefined") { # // if there is just one item, flatten/expand has turned urls in objects with just an id # // reverse this, as we don't want these separate for this project # if (Object.keys(nodeAttr[i]).length == 1 && typeof nodes[nodeAttr[i]["@id"]] === "undefined") { # // skip # // nodeAttr = nodeAttr[i]["id"]; # } else { # links[links.length] = { # "source": currentId, # "target": nodeAttr[i]["@id"], # "name": key # }; # } # } # } # } # }