portfolio/ranking/jsonld_ranking.py

95 lines
2.8 KiB
Python

from pyld import jsonld
import json
import PageRank.pagerank as pagerank
# use flattened jsonld
with open('../assets/js/rubenvandeven.jsonld', 'r') as fp:
contents = json.load(fp)['@graph']
linksPerItem = {}
def addCount(fromId, toId, weight):
if fromId not in linksPerItem:
linksPerItem[fromId] = {}
if not toId in linksPerItem[fromId]:
linksPerItem[fromId][toId] = 0
linksPerItem[fromId][toId] += weight
def addLink(fromId, toId, weight = 1):
addCount(fromId, toId, weight)
addCount(toId, fromId, weight)
for node in contents:
currentId = node['@id']
for key, value in node.items():
if type(value) == dict:
value = [value]
if type(value) == list:
for link in value:
if not type(link) == dict:
# can be any other list, eg. list of urls
continue
weight = 1
if key == 'https://schema.org/author':
weight = 2
elif key == 'https://schema.org/contributor':
weight = .5
# print(link)
linkedId = link['@id']
addLink(currentId, linkedId)
targetMin = .7
targetMax = 1.5
values = pagerank.powerIteration(linksPerItem)
normalised = targetMin + (values - min(values)) * (targetMax-targetMin)/max(values)
print(json.dumps(dict(normalised)))
#
# factor = 1 / min(values)
# print(factor)
# normalised = values * factor
# normalised -
#
# for(let nodeId in data) {
# let node = data[nodeId];
# let currentId = node["@id"];
# for(let key in node){
# let nodeAttr = Array.isArray(node[key]) ? node[key] : [node[key]];
# // // relations should always be lists (eases assumptions)
# // if(typeof node[key] !== "Array" && typeof node[key]['id'] !== "undefined") {
# // node[key] = [node[key]];
# // }
# // every attribute is an Array after flatten(), loop them
# for(let i in nodeAttr) {
# if(key !== "@id" && typeof nodeAttr[i] === "string" && nodes[nodeAttr[i]]) {
# links[links.length] = {
# "source": currentId,
# "target": nodeAttr[i],
# "name": key
# };
# }
# else if(typeof nodeAttr[i]["@id"] !== "undefined") {
# // if there is just one item, flatten/expand has turned urls in objects with just an id
# // reverse this, as we don't want these separate for this project
# if (Object.keys(nodeAttr[i]).length == 1 && typeof nodes[nodeAttr[i]["@id"]] === "undefined") {
# // skip
# // nodeAttr = nodeAttr[i]["id"];
# } else {
# links[links.length] = {
# "source": currentId,
# "target": nodeAttr[i]["@id"],
# "name": key
# };
# }
# }
# }
# }
# }