Bot to created semantic properties to CiteRef'ed subobjects
This commit is contained in:
commit
091b0fa211
4 changed files with 259 additions and 0 deletions
8
README.md
Normal file
8
README.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
Workaround bot to add SemanticCite references as semantic relations.
|
||||||
|
|
||||||
|
It creates a [[Cited references]] property which links to the cited subobject. See also the discussion [here](https://github.com/SemanticMediaWiki/SemanticCite/issues/99).
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
1. Create a bot user on the wiki. Add its credentials to a file following the format of `no_credentials.json`.
|
||||||
|
2. Run the bot `python citation_bot.py --url https://www.your.domain/wiki/api.php --credentials your_credentials.json`. Which will now insert a block of text containing the semantic property to all pages that have a CiteRef.
|
245
citation_bot.py
Normal file
245
citation_bot.py
Normal file
|
@ -0,0 +1,245 @@
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
logger = logging.getLogger('wiki.importer')
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='''Workaround bot to add SemanticCite references as semantic relations.
|
||||||
|
It creates a [[Cited references]] property which links to the cited subobject''')
|
||||||
|
parser.add_argument('--url', default="https://www.your.domain/wiki/api.php",
|
||||||
|
help='Wiki API URL')
|
||||||
|
parser.add_argument('--credentials', default="no_credentials.json",
|
||||||
|
help="JSON file containing the Bot's credentials")
|
||||||
|
parser.add_argument('--verbose', '-v', action="store_true",
|
||||||
|
help="enable more errors")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logger.setLevel(logging.DEBUG if args.verbose else logging.WARNING)
|
||||||
|
|
||||||
|
with open(args.credentials) as fp:
|
||||||
|
credentials = json.load(fp)
|
||||||
|
username = credentials['user']
|
||||||
|
password = credentials['password']
|
||||||
|
|
||||||
|
|
||||||
|
def get_session():
|
||||||
|
S = requests.Session()
|
||||||
|
|
||||||
|
URL = args.url
|
||||||
|
|
||||||
|
# Retrieve login token first
|
||||||
|
PARAMS_0 = {
|
||||||
|
'action': "query",
|
||||||
|
'meta': "tokens",
|
||||||
|
'type': "login",
|
||||||
|
'format': "json"
|
||||||
|
}
|
||||||
|
|
||||||
|
R = S.get(url=URL, params=PARAMS_0)
|
||||||
|
DATA = R.json()
|
||||||
|
logger.debug(DATA)
|
||||||
|
LOGIN_TOKEN = DATA['query']['tokens']['logintoken']
|
||||||
|
|
||||||
|
logger.debug(LOGIN_TOKEN)
|
||||||
|
|
||||||
|
# Send a post request to login. Using the main account for login is not
|
||||||
|
# supported. Obtain credentials via Special:BotPasswords
|
||||||
|
# (https://www.mediawiki.org/wiki/Special:BotPasswords) for lgname & lgpassword
|
||||||
|
|
||||||
|
PARAMS_1 = {
|
||||||
|
'action': "login",
|
||||||
|
'lgname': username,
|
||||||
|
'lgpassword': password,
|
||||||
|
'lgtoken': LOGIN_TOKEN,
|
||||||
|
'format': "json"
|
||||||
|
}
|
||||||
|
|
||||||
|
R = S.post(URL, data=PARAMS_1)
|
||||||
|
DATA = R.json()
|
||||||
|
|
||||||
|
logger.debug(DATA)
|
||||||
|
if DATA['login']['result'] != 'Success':
|
||||||
|
raise Exception("Failed logging in")
|
||||||
|
|
||||||
|
return S
|
||||||
|
|
||||||
|
|
||||||
|
def renderPage(data):
|
||||||
|
global args
|
||||||
|
|
||||||
|
page = f"{{{{{data['@type']}"
|
||||||
|
for key, value in data['properties'].items():
|
||||||
|
page += f"\n|{key}=" + \
|
||||||
|
(', '.join(value) if isinstance(value, list) else value)
|
||||||
|
page += "}}\n\n"
|
||||||
|
|
||||||
|
for b in data['body']:
|
||||||
|
if b and len(b):
|
||||||
|
page += f"<blockquote>{b} [[CiteRef::{args.citeref}]]</blockquote>\n\n"
|
||||||
|
|
||||||
|
if len(data['additionalProperties']):
|
||||||
|
page += "=== Additional properties ===\n\n"
|
||||||
|
for key, value in data['additionalProperties'].items():
|
||||||
|
if not isinstance(value, list):
|
||||||
|
value = [value]
|
||||||
|
|
||||||
|
for v in value:
|
||||||
|
if v:
|
||||||
|
page += f"* {key} [[{key}::{v}]]\n"
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
def saveIfNotExists(data, page, session, token):
|
||||||
|
# https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=New%20York%20Yankeesdfsdf
|
||||||
|
# baseurl = f"{args.url}?action=query&list=categorymembers&cmtitle=Category:{category}&format=json"
|
||||||
|
params = {
|
||||||
|
'action': 'edit',
|
||||||
|
'createonly': '1',
|
||||||
|
'title': data['title'],
|
||||||
|
'contentformat': 'text/x-wiki',
|
||||||
|
'text': page,
|
||||||
|
'format': 'json',
|
||||||
|
'token': token,
|
||||||
|
}
|
||||||
|
logger.debug(args.url, params)
|
||||||
|
logger.warning(f"Creating {data['title']}")
|
||||||
|
response = session.post(args.url, data=params)
|
||||||
|
resp = response.json()
|
||||||
|
|
||||||
|
if 'warnings' in resp:
|
||||||
|
logger.warning(resp)
|
||||||
|
|
||||||
|
logger.debug(resp)
|
||||||
|
# print(responseData)
|
||||||
|
|
||||||
|
|
||||||
|
def getEditToken(session):
|
||||||
|
params = {
|
||||||
|
'action': "query",
|
||||||
|
'meta': "tokens",
|
||||||
|
'type': "csrf",
|
||||||
|
'format': "json"
|
||||||
|
}
|
||||||
|
|
||||||
|
R = session.get(args.url, params=params)
|
||||||
|
DATA = R.json()
|
||||||
|
logger.debug(DATA)
|
||||||
|
return DATA['query']['tokens']['csrftoken']
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# logger.setLevel(logging.DEBUG)
|
||||||
|
session = get_session()
|
||||||
|
token = getEditToken(session)
|
||||||
|
|
||||||
|
# TODO also ask for Cited referenes property, as to clear any references that have been deleted.
|
||||||
|
results = session.get(args.url, params={
|
||||||
|
# /api.php?action=ask&query=[[Modification date::%2B]]|%3FModification date|sort%3DModification date|order%3Ddesc&format=jsonfm
|
||||||
|
"action": "ask",
|
||||||
|
"query": "[[Citation reference::+]]|?Citation reference|limit=5000",
|
||||||
|
"format": "json"
|
||||||
|
})-
|
||||||
|
|
||||||
|
parsed = results.json()
|
||||||
|
|
||||||
|
citationCache = {}
|
||||||
|
|
||||||
|
print(f"Fetched {len(parsed['query']['results'])} items...")
|
||||||
|
for name, page in parsed['query']['results'].items():
|
||||||
|
# print(name, page['printouts']['Citation reference'])
|
||||||
|
refSources = []
|
||||||
|
for ref in page['printouts']['Citation reference']:
|
||||||
|
if ref not in citationCache:
|
||||||
|
logger.info(f'lookup {ref}')
|
||||||
|
results = session.get(args.url, params={
|
||||||
|
# /api.php?action=ask&query=[[Modification date::%2B]]|%3FModification date|sort%3DModification date|order%3Ddesc&format=jsonfm
|
||||||
|
"action": "ask",
|
||||||
|
"query": f"[[Citation key::{ref}]]",
|
||||||
|
"format": "json"
|
||||||
|
})
|
||||||
|
|
||||||
|
refResult = results.json()
|
||||||
|
for pageid in refResult['query']['results']:
|
||||||
|
citationCache[ref] = pageid.replace(" ", "_")
|
||||||
|
if ref not in citationCache:
|
||||||
|
logger.error(f'Skip unknown ref: {ref}')
|
||||||
|
else:
|
||||||
|
refSources.append(citationCache[ref])
|
||||||
|
|
||||||
|
results = session.get(args.url, params={
|
||||||
|
# /api.php?action=ask&query=[[Modification date::%2B]]|%3FModification date|sort%3DModification date|order%3Ddesc&format=jsonfm
|
||||||
|
"action": "query",
|
||||||
|
"prop": "revisions",
|
||||||
|
"titles": name.replace(' ', "_"),
|
||||||
|
"rvslots": "*",
|
||||||
|
"rvprop": "content",
|
||||||
|
"formatversion": "2",
|
||||||
|
"format": "json"
|
||||||
|
})
|
||||||
|
|
||||||
|
pageResult = results.json()
|
||||||
|
content = pageResult['query']['pages'][0]['revisions'][0]['slots']['main']['content']
|
||||||
|
|
||||||
|
reflinks = "\n".join(
|
||||||
|
[f"* [[Cited references::{ref}]]" for ref in refSources])
|
||||||
|
botblock = f"""
|
||||||
|
--- By citebot ---
|
||||||
|
|
||||||
|
This page uses the following references:
|
||||||
|
|
||||||
|
{reflinks}
|
||||||
|
--- end citebot ---
|
||||||
|
"""
|
||||||
|
logger.debug(botblock)
|
||||||
|
if botblock in content:
|
||||||
|
logger.debug('nothing changed, ignore')
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "By citebot" in content:
|
||||||
|
# replace
|
||||||
|
splits = re.split(
|
||||||
|
"\n--- (By citebot|end citebot) ---\n", content)
|
||||||
|
content = splits[0] + botblock
|
||||||
|
if len(splits) > 3:
|
||||||
|
content += splits[4]
|
||||||
|
else:
|
||||||
|
content += botblock
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'action': 'edit',
|
||||||
|
'nocreate': '1',
|
||||||
|
'title': name.replace(' ', "_"),
|
||||||
|
'contentformat': 'text/x-wiki',
|
||||||
|
'text': content,
|
||||||
|
'format': 'json',
|
||||||
|
'token': token,
|
||||||
|
"summary": "Citebot"
|
||||||
|
}
|
||||||
|
logger.debug(args.url, params)
|
||||||
|
logger.info(f"Update {name}")
|
||||||
|
response = session.post(args.url, data=params)
|
||||||
|
resp = response.json()
|
||||||
|
|
||||||
|
if 'warnings' in resp:
|
||||||
|
logger.warning(resp)
|
||||||
|
else:
|
||||||
|
logger.info(f"Updated {name}")
|
||||||
|
|
||||||
|
logger.debug(resp)
|
||||||
|
|
||||||
|
# i = 0
|
||||||
|
# with open(args.csv, newline='') as csvfile:
|
||||||
|
# csvreader = csv.DictReader(csvfile, delimiter=',')
|
||||||
|
# for row in csvreader:
|
||||||
|
# data = mapEntry(row)
|
||||||
|
# page = renderPage(data)
|
||||||
|
# saveIfNotExists(data, page, session, token)
|
||||||
|
# i+= 1
|
||||||
|
# # if i > 5:
|
||||||
|
# # break
|
4
no_credentials.json
Normal file
4
no_credentials.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"user": "",
|
||||||
|
"password": ""
|
||||||
|
}
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
requests
|
||||||
|
|
Loading…
Reference in a new issue