portfolio/ranking/jsonld_ranking.py

from pyld import jsonld
import json
import PageRank.pagerank as pagerank
import argparse

parser = argparse.ArgumentParser(description='Apply pagerank to the (flattened) JSONLD file')
parser.add_argument('--jsonld', '-i', metavar='FILENAME.JSONLD', type=str,
                    required=True,
                    help='Input filename, make sure to use the flattened JSONLD (in our case from the \'assets\' folder)')
parser.add_argument('--output', '-o', type=str,
                    default=None, metavar='FILENAME.JSON',
                    help='Optional output filename for the resulting JSON')

args = parser.parse_args()

# use flattened jsonld
with open(args.jsonld, 'r') as fp:
    contents = json.load(fp)['@graph']

linksPerItem = {}

def addCount(fromId, toId, weight):
    if fromId not in linksPerItem:
        linksPerItem[fromId] = {}
    if not toId in linksPerItem[fromId]:
        linksPerItem[fromId][toId] = 0
    linksPerItem[fromId][toId] += weight

def addLink(fromId, toId, weight = 1):
    # Add link both ways: from + to.
    addCount(fromId, toId, weight)
    addCount(toId, fromId, weight)

for node in contents:
    currentId = node['@id']
    for key, value in node.items():
        if type(value) == dict:
            value = [value]

        if type(value) == list:
            for link in value:
                if not type(link) == dict:
                    # can be any other list, eg. list of urls
                    continue

                weight = 1
                if key == 'https://schema.org/author':
                    weight = 2
                elif key == 'https://schema.org/contributor':
                    weight = .5

                # print(link)
                linkedId = link['@id']
                addLink(currentId, linkedId)


targetMin = .7
targetMax = 1.5

values = pagerank.powerIteration(linksPerItem)

normalised = targetMin + (values - min(values)) * (targetMax-targetMin)/max(values)

if args.output is None:
    print(json.dumps(dict(normalised)))
else:
    with open(args.output, 'w') as fp:
        json.dump(dict(normalised), fp)

#
# factor = 1 / min(values)
# print(factor)
# normalised = values * factor
# normalised -

#
# for(let nodeId in data) {
#   let node = data[nodeId];
#   let currentId = node["@id"];
#   for(let key in node){
#     let nodeAttr = Array.isArray(node[key]) ? node[key] : [node[key]];
#     // // relations should always be lists (eases assumptions)
#     // if(typeof node[key] !== "Array" && typeof node[key]['id'] !== "undefined") {
#     //   node[key] = [node[key]];
#     // }
#     // every attribute is an Array after flatten(), loop them
#     for(let i in nodeAttr) {
#       if(key !== "@id" && typeof nodeAttr[i] === "string" && nodes[nodeAttr[i]]) {
#         links[links.length] = {
#           "source": currentId,
#           "target": nodeAttr[i],
#           "name": key
#           };
#       }
#       else if(typeof nodeAttr[i]["@id"] !== "undefined") {
#         // if there is just one item, flatten/expand has turned urls in objects with just an id
#         // reverse this, as we don't want these separate for this project
#         if (Object.keys(nodeAttr[i]).length == 1 && typeof nodes[nodeAttr[i]["@id"]] === "undefined") {
#             // skip
#             // nodeAttr = nodeAttr[i]["id"];
#         } else {
#           links[links.length] = {
#             "source": currentId,
#             "target": nodeAttr[i]["@id"],
#             "name": key
#             };
#         }
#       }
#     }
#   }
# }
More info and prep for pagerank 2020-01-31 18:44:17 +00:00			`from pyld import jsonld`
			`import json`
			`import PageRank.pagerank as pagerank`
PageRanking in gulpfile and hasCredential instead of attendee 2020-03-24 20:27:14 +00:00			`import argparse`

			`parser = argparse.ArgumentParser(description='Apply pagerank to the (flattened) JSONLD file')`
			`parser.add_argument('--jsonld', '-i', metavar='FILENAME.JSONLD', type=str,`
			`required=True,`
			`help='Input filename, make sure to use the flattened JSONLD (in our case from the \'assets\' folder)')`
			`parser.add_argument('--output', '-o', type=str,`
			`default=None, metavar='FILENAME.JSON',`
			`help='Optional output filename for the resulting JSON')`

			`args = parser.parse_args()`
More info and prep for pagerank 2020-01-31 18:44:17 +00:00
			`# use flattened jsonld`
PageRanking in gulpfile and hasCredential instead of attendee 2020-03-24 20:27:14 +00:00			`with open(args.jsonld, 'r') as fp:`
More info and prep for pagerank 2020-01-31 18:44:17 +00:00			`contents = json.load(fp)['@graph']`

			`linksPerItem = {}`

			`def addCount(fromId, toId, weight):`
			`if fromId not in linksPerItem:`
			`linksPerItem[fromId] = {}`
			`if not toId in linksPerItem[fromId]:`
			`linksPerItem[fromId][toId] = 0`
			`linksPerItem[fromId][toId] += weight`

			`def addLink(fromId, toId, weight = 1):`
PageRanking in gulpfile and hasCredential instead of attendee 2020-03-24 20:27:14 +00:00			`# Add link both ways: from + to.`
More info and prep for pagerank 2020-01-31 18:44:17 +00:00			`addCount(fromId, toId, weight)`
			`addCount(toId, fromId, weight)`

			`for node in contents:`
			`currentId = node['@id']`
			`for key, value in node.items():`
			`if type(value) == dict:`
			`value = [value]`

			`if type(value) == list:`
			`for link in value:`
			`if not type(link) == dict:`
			`# can be any other list, eg. list of urls`
			`continue`

			`weight = 1`
			`if key == 'https://schema.org/author':`
			`weight = 2`
			`elif key == 'https://schema.org/contributor':`
			`weight = .5`

			`# print(link)`
			`linkedId = link['@id']`
			`addLink(currentId, linkedId)`


			`targetMin = .7`
			`targetMax = 1.5`

			`values = pagerank.powerIteration(linksPerItem)`

			`normalised = targetMin + (values - min(values)) * (targetMax-targetMin)/max(values)`
PageRanking in gulpfile and hasCredential instead of attendee 2020-03-24 20:27:14 +00:00
			`if args.output is None:`
			`print(json.dumps(dict(normalised)))`
			`else:`
			`with open(args.output, 'w') as fp:`
			`json.dump(dict(normalised), fp)`

More info and prep for pagerank 2020-01-31 18:44:17 +00:00			`#`
			`# factor = 1 / min(values)`
			`# print(factor)`
			`# normalised = values * factor`
			`# normalised -`

			`#`
			`# for(let nodeId in data) {`
			`# let node = data[nodeId];`
			`# let currentId = node["@id"];`
			`# for(let key in node){`
			`# let nodeAttr = Array.isArray(node[key]) ? node[key] : [node[key]];`
			`# // // relations should always be lists (eases assumptions)`
			`# // if(typeof node[key] !== "Array" && typeof node[key]['id'] !== "undefined") {`
			`# // node[key] = [node[key]];`
			`# // }`
			`# // every attribute is an Array after flatten(), loop them`
			`# for(let i in nodeAttr) {`
			`# if(key !== "@id" && typeof nodeAttr[i] === "string" && nodes[nodeAttr[i]]) {`
			`# links[links.length] = {`
			`# "source": currentId,`
			`# "target": nodeAttr[i],`
			`# "name": key`
			`# };`
			`# }`
			`# else if(typeof nodeAttr[i]["@id"] !== "undefined") {`
			`# // if there is just one item, flatten/expand has turned urls in objects with just an id`
			`# // reverse this, as we don't want these separate for this project`
			`# if (Object.keys(nodeAttr[i]).length == 1 && typeof nodes[nodeAttr[i]["@id"]] === "undefined") {`
			`# // skip`
			`# // nodeAttr = nodeAttr[i]["id"];`
			`# } else {`
			`# links[links.length] = {`
			`# "source": currentId,`
			`# "target": nodeAttr[i]["@id"],`
			`# "name": key`
			`# };`
			`# }`
			`# }`
			`# }`
			`# }`
			`# }`