portfolio/ranking/jsonld_ranking.py

from pyld import jsonld
import json
import PageRank.pagerank as pagerank
import argparse

parser = argparse.ArgumentParser(description='Apply pagerank to the (flattened) JSONLD file')
parser.add_argument('--jsonld', '-i', metavar='FILENAME.JSONLD', type=str,
                    required=True,
                    help='Input filename, make sure to use the flattened JSONLD (in our case from the \'assets\' folder)')
parser.add_argument('--output', '-o', type=str,
                    default=None, metavar='FILENAME.JSON',
                    help='Optional output filename for the resulting JSON')

args = parser.parse_args()

# use flattened jsonld
with open(args.jsonld, 'r') as fp:
    contents = json.load(fp)['@graph']

linksPerItem = {}

def addCount(fromId, toId, weight):
    if fromId not in linksPerItem:
        linksPerItem[fromId] = {}
    if not toId in linksPerItem[fromId]:
        linksPerItem[fromId][toId] = 0
    linksPerItem[fromId][toId] += weight

def addLink(fromId, toId, weight = 1):
    # Add link both ways: from + to.
    addCount(fromId, toId, weight)
    addCount(toId, fromId, weight)

for node in contents:
    currentId = node['@id']
    for key, value in node.items():
        if type(value) == dict:
            value = [value]

        if type(value) == list:
            for link in value:
                if not type(link) == dict:
                    # can be any other list, eg. list of urls
                    continue

                weight = 1
                if key == 'https://schema.org/author':
                    weight = 2
                elif key == 'https://schema.org/contributor':
                    weight = .5

                # print(link)
                linkedId = link['@id']
                addLink(currentId, linkedId)


targetMin = .7
targetMax = 1.5

values = pagerank.powerIteration(linksPerItem)

normalised = targetMin + (values - min(values)) * (targetMax-targetMin)/max(values)

if args.output is None:
    print(json.dumps(dict(normalised)))
else:
    with open(args.output, 'w') as fp:
        json.dump(dict(normalised), fp)

#
# factor = 1 / min(values)
# print(factor)
# normalised = values * factor
# normalised -

#
# for(let nodeId in data) {
#   let node = data[nodeId];
#   let currentId = node["@id"];
#   for(let key in node){
#     let nodeAttr = Array.isArray(node[key]) ? node[key] : [node[key]];
#     // // relations should always be lists (eases assumptions)
#     // if(typeof node[key] !== "Array" && typeof node[key]['id'] !== "undefined") {
#     //   node[key] = [node[key]];
#     // }
#     // every attribute is an Array after flatten(), loop them
#     for(let i in nodeAttr) {
#       if(key !== "@id" && typeof nodeAttr[i] === "string" && nodes[nodeAttr[i]]) {
#         links[links.length] = {
#           "source": currentId,
#           "target": nodeAttr[i],
#           "name": key
#           };
#       }
#       else if(typeof nodeAttr[i]["@id"] !== "undefined") {
#         // if there is just one item, flatten/expand has turned urls in objects with just an id
#         // reverse this, as we don't want these separate for this project
#         if (Object.keys(nodeAttr[i]).length == 1 && typeof nodes[nodeAttr[i]["@id"]] === "undefined") {
#             // skip
#             // nodeAttr = nodeAttr[i]["id"];
#         } else {
#           links[links.length] = {
#             "source": currentId,
#             "target": nodeAttr[i]["@id"],
#             "name": key
#             };
#         }
#       }
#     }
#   }
# }