3 changed files with 0 additions and 828 deletions
--- a/csv_importer_atlas-of-surveillance.py
+++ b/csv_importer_atlas-of-surveillance.py
@ -1,469 +0,0 @@
-from time import sleep
-from typing import Optional
-import urllib.request
-import json
-import logging
-import requests
-import argparse
-import datetime
-import tqdm
-import csv
-from geopy.geocoders import Nominatim
-
-logger = logging.getLogger('wiki.importer')
-
-default_categories = [
-    'Person',
-    'Institution',
-    'Technology',
-    'Deployments',
-    'Dataset',
-    'City',
-    'Country',
-]
-
-
-geocoder = Nominatim(user_agent="tutorial")
-
-parser = argparse.ArgumentParser(
-    description='Turn wiki into nodes & links, usable by d3-force.')
-parser.add_argument('--categories', metavar='categories', default=default_categories, nargs='+',
-                    help='Categories')
-parser.add_argument('--url',  default="https://www.securityvision.io/wiki/api.php",
-                    help='Wiki API URL')
-parser.add_argument('--output',  default="semantic_data.json",
-                    help='Output JSON file')
-parser.add_argument('--credentials',  default="no_credentials.json",
-                    help="JSON file containing the Bot's credentials")
-parser.add_argument('--csv',  default="Atlas of Surveillance-Gunshot Detection,Face Recognition,Real-Time Crime Center,Video Analytics-20220621.csv",
-                    help="CVS file to import")
-parser.add_argument('--citeref',  default="atlasofsurveillance2022",
-                    help="Bibliography key for imported items")
-parser.add_argument('--dry-run', '-n',  action="store_true",
-                    help="Dry run")
-parser.add_argument('--skip-geolocation',  action="store_true",
-                    help="Skip geolocation fetch, for faster dry-run")
-
-args = parser.parse_args()
-
-if args.skip_geolocation and not args.dry_run:
-    raise Exception("Cannot do a real run without geolocating cities")
-
-with open(args.credentials) as fp:
-    credentials = json.load(fp)
-    username = credentials['user']
-    password = credentials['password']
-
-
-def get_session():
-    S = requests.Session()
-
-    URL = args.url
-
-    # Retrieve login token first
-    PARAMS_0 = {
-        'action': "query",
-        'meta': "tokens",
-        'type': "login",
-        'format': "json"
-    }
-
-    R = S.get(url=URL, params=PARAMS_0)
-    DATA = R.json()
-    logger.debug(DATA)
-    LOGIN_TOKEN = DATA['query']['tokens']['logintoken']
-
-    logger.debug(LOGIN_TOKEN)
-
-    # Send a post request to login. Using the main account for login is not
-    # supported. Obtain credentials via Special:BotPasswords
-    # (https://www.mediawiki.org/wiki/Special:BotPasswords) for lgname & lgpassword
-
-    PARAMS_1 = {
-        'action': "login",
-        'lgname': username,
-        'lgpassword': password,
-        'lgtoken': LOGIN_TOKEN,
-        'format': "json"
-    }
-
-    R = S.post(URL, data=PARAMS_1)
-    DATA = R.json()
-
-    logger.debug(DATA)
-    if DATA['login']['result'] != 'Success':
-        raise Exception("Failed logging in")
-
-    return S
-
-
-# Map columns
-[
-    'AOSNUMBER', 
-    'City', # City_state
-    # 'County',
-    'State',
-    'Agency', # Institution
-    'Type of LEA', # Law enforment agency (Institution type?)
-    'Summary', # body text
-    'Type of Juris', # instution type? (municipal/county/state etc)
-    'Technology', # deployment type? face recognition etc
-    'Vendor', # empty or clearview ai, veritone etc. (Institution)
-    'Link 1',
-    # 'Link 1 Snapshot',
-    'Link 1 Source',
-    # 'Link 1 Type',
-    'Link 1 Date',
-    'Link 2',
-    # 'Link 2 Snapshot',
-    'Link 2 Source',
-    # 'Link 2 Type',
-    'Link 2 Date',
-    'Link 3',
-    # 'Link 3 Snapshot',
-    'Link 3 Source',
-    # 'Link 3 Type',
-    'Link 3 Date',
-    # 'Other Links',
-    'Statewide Network of Agency Photos (SNAP)',  # single deplyment, aggregrate Used by
-    'Face Analysis Comparison & Examination System (FACES)',  # single deplyment, aggregrate Used by
-    'Maryland Image Repository System', # single deplyment, aggregrate Used by
-    #'Clearview AI', # no aggregation
-    #'BriefCam', # no aggregation?
-    'FACE Services', # create link: Input for
-    'Relevant for the Wiki?', # FILTER!!
-]
-
-
-# title: Use of [VENDOR ]TECHNOLOGY by AGENCY
-# City: CITY (STATE) or aggregated
-# Country: USA
-# Software Used: VENDOR TECHNOLOGY
-# Used by: AGENCY
-# Information Certainty: Documented
-# Input for: [FACE Services]
-# body: <blockquote> SUMMARY</blockquote> 
-# Additional properties:
-    # Original Sources: url date (link 1, 2, 3)
-    # AOSNUMBER
-    # CiteRef: args.citeref
-
-# title: AGENCY
-# City: CITY (STATE)
-# Institution Type: Law Enforcement
-
-
-# aggregate agencies when these columns are 'yes'
-aggregates = {
-    'Statewide Network of Agency Photos (SNAP)': [],
-    'Face Analysis Comparison & Examination System (FACES)': [],
-    'Maryland Image Repository System': [],
-}
-
-institutions = {}
-cities = {}
-technologies = {}
-
-def mapEntry(entry) -> Optional[dict]:
-    if entry['Relevant for the Wiki?'] != 'Yes':
-        logger.warning(f'Ignore entry {entry["AOSNUMBER"]}')
-        return None
-    else:
-        hasAggregated = False
-        for field in aggregates.keys():
-            if entry[field] == 'Yes':
-                aggregates[field].append(entry)
-                hasAggregated = True
-
-        if hasAggregated:
-            return None
-
-        return mapDeployment(entry)
-
-def mapTechnology(entry):
-    entry['Vendor'] = entry['Vendor'].strip()
-    tech = {
-        'title': f"{entry['Vendor'] if entry['Vendor'] else 'Unknown'} {entry['Technology']}",
-        "@type": "Products",
-        'properties': {
-            "Developed by": entry['Vendor'],
-        },
-        "additionalProperties": {
-            "Technology Type": entry['Technology'],
-            "Needs processing of the title": "Yes",
-            'CiteRef': args.citeref,
-        }
-    }
-    technologies[tech['title']] = tech
-    return tech
-
-def mapDeployment(entry):
-    global args
-    city = mapCity(entry)
-    mapInstitution(entry) 
-    if entry['Vendor'] and len(entry['Vendor'].strip()) > 0:
-        mapDeveloperInstitution(entry['Vendor']) 
-    tech = mapTechnology(entry)
-    return {
-        'title': f"{entry['Vendor']} {entry['Technology']} used by {entry['Agency']}".replace('  '," ").strip(),
-        '@type': 'Deployments',
-        'properties': {
-            "Keywords": [entry['Technology']],
-            "used by": entry['Agency'],
-            "Software Deployed": tech['title'],
-            "City": city['title'],
-            "Information Certainty": "Documented",
-        },
-        "additionalProperties": {
-            "URL": [
-                "https://atlasofsurveillance.org/es/a/"+entry['AOSNUMBER'],
-                entry['Link 1'],
-                entry['Link 2'],
-                entry['Link 3'],
-                ],
-            'CiteRef': args.citeref, 
-            "Input for": "FACES (FBI) Dataset" if entry['FACE Services'] == 'Yes' else None,
-        },
-        "body": [entry['Summary']]
-    }
-
-
-# Type of LEA: Court gives Institution Type::Government Instititution Sector::Justice, 
-# Police/Sheriff/State Police/District Attorney/Attorney General/Prosecutor/School Police/Constables/DHS/Fusion Center/Juvenile/Security/Transit Police Type::Law Enfrocement Instititution Sector::Security
-# DMV/Emergency Services/Parks/State Agency/Transit  Institution Type::Government Institution Sector::Civil Administration
-# Medical Examiner Institution Type::Government Institution Sector::Health 
-# School District Institution Type::Local Government Institution Sector::Education
-# State-Local Partnership Institution Type::State-Local Partnership Institution Sector::Security
-
-institution_type_sector = {
-    "Court": ("Government", "Justice"),
-
-    "Police": ("Law Enforcement", "Security"),
-    "Sheriff": ("Law Enforcement", "Security"),
-    "State Police": ("Law Enforcement", "Security"),
-    "District Attorney": ("Law Enforcement", "Security"),
-    "Attorney General": ("Law Enforcement", "Security"),
-    "Prosecutor": ("Law Enforcement", "Security"),
-    "School Police": ("Law Enforcement", "Security"),
-    "Constables": ("Law Enforcement", "Security"),
-    "DHS": ("Law Enforcement", "Security"),
-    "Fusion Center": ("Law Enforcement", "Security"),
-    "Juvenile": ("Law Enforcement", "Security"),
-    "Security": ("Law Enforcement", "Security"),
-    "Transit Police": ("Law Enforcement", "Security"),
-    "Corrections": ("Law Enforcement", "Security"),
-    "Clemis": ("Law Enforcement", "Security"),
-
-    "DMV": ("Government", "Civil Administration"),
-    "Emergency Services": ("Government", "Civil Administration"),
-    "Parks": ("Government", "Civil Administration"),
-    "State Agency": ("Government", "Civil Administration"),
-    "Transit": ("Government", "Civil Administration"),
-
-    "Medical Examiner": ("Local Government", "Health"),
-    "School District": ("Local Government", "Education"),
-
-    "State-Local Partnership": ("State-Local Partnership", "Security"),
-}
-
-def mapInstitution(entry):
-    # aggregate agencies as institutions from entries
-    global args
-
-    type, sector = institution_type_sector[entry['Type of LEA']]
-    info = {
-        'title': entry['Agency'],
-        '@type': 'Institution',
-        'properties': {
-            "Institution Type": type,
-            "Institution Sector": sector,
-            'City': mapCity(entry)['title'],
-        },
-        "additionalProperties": {
-            "Type of Juris": entry['Type of Juris'],
-            "URL": [
-                "https://atlasofsurveillance.org/es/a/"+entry['AOSNUMBER'],
-                entry['Link 1'],
-                entry['Link 2'],
-                entry['Link 3'],
-                ],
-            'CiteRef': args.citeref,
-        },
-        "body": [entry['Type of LEA']],
-    }
-
-    if entry['Agency'] in institutions:
-        logger.warning(f'Ignore duplicate {entry["Agency"]}')
-    else:
-        institutions[entry['Agency']] = info
-
-def mapDeveloperInstitution(title):
-    
-    if title in institutions:
-        return
-    
-    institutions[title] = {
-        'title': title,
-        '@type': 'Institution',
-        'properties': {
-        },
-        "additionalProperties": {
-            "Needs content": "Yes",
-            'CiteRef': args.citeref,
-        }
-    }
-
-
-def mapCity(entry):
-    title = f"{entry['City']} ({entry['State']})"
-    if title not in cities:
-        info = {
-            'title': title,
-            '@type': 'City',
-            'properties': {
-                "is in Country": "USA",
-            },
-            "additionalProperties": {
-                'CiteRef': args.citeref,
-            }
-        }
-
-        if not args.skip_geolocation:
-            location_response = geocoder.geocode(f"{entry['City']}, {entry['State']}, USA")
-            sleep(1) # free tier of location geocode requires 1 sec delay
-            if location_response:
-                location = location_response.raw
-                info["properties"]["Has Coordinates"] = f"{location['lat']}, {location['lon']}"
-                info["body"] = [location['display_name']]
-            else:
-                logger.warning(f"No location data for {title} USA")
-
-        cities[title] = info
-    return cities[title]
-
-
-def mapAggregate(title, data):
-    urls = [ "https://atlasofsurveillance.org/es/a/"+entry['AOSNUMBER'] for entry in data]
-    urls.extend([entry['Link 1'] for entry in data])
-    urls.extend([entry['Link 2'] for entry in data])
-    urls.extend([entry['Link 3'] for entry in data])
-    urls = list(dict.fromkeys(urls)) # unique
-    urls = list(filter(lambda url: url and len(url) > 0, urls))
-
-    for entry in data:
-        mapInstitution(entry)
-
-    return {
-        "title": title,
-        '@type': 'Deployments',
-        'properties': {
-            "Information Certainty": "Documented",
-            "used by": [entry['Agency'] for entry in data]
-        },
-        "additionalProperties": {
-            "URL": urls,
-            'CiteRef': args.citeref,
-        }
-    }
-
-def renderPage(data):
-    global args
-
-    page = f"{{{{{data['@type']}"
-    for key, value in data['properties'].items():
-        page += f"\n|{key}=" + (', '.join(value) if isinstance(value, list) else value)
-    page += "}}\n\n"
-
-    if 'body' in data:
-        for b in data['body']:
-            if b and len(b):
-                page += f"<blockquote>{b} [[CiteRef::{args.citeref}]]</blockquote>\n\n"
-    
-    if len(data['additionalProperties']):
-        page += "=== Additional properties ===\n\n"
-    for key, value in data['additionalProperties'].items():
-        if not isinstance(value, list):
-            value = [value]
-        
-        for v in value:
-            if v:
-                page += f"* {key} [[{key}::{v}]]\n"
-    return page
-
-def saveIfNotExists(data, page, session, token):
-    # https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=New%20York%20Yankeesdfsdf
-    # baseurl = f"{args.url}?action=query&list=categorymembers&cmtitle=Category:{category}&format=json"
-    params = {
-        'action': 'edit',
-        'createonly': '1',
-        'title': data['title'].strip(),
-        'contentformat': 'text/x-wiki',
-        'text': page,
-        'format': 'json',
-        'token': token,
-    }
-    logger.debug(args.url, params)
-
-    if not args.dry_run:
-        logger.warning(f"Creating '{data['title'].strip()}' type {data['@type']}")
-        response = session.post(args.url, data=params)
-        resp =  response.json()
-
-        if 'warnings' in resp:
-            logger.warning(resp)
-        
-        logger.debug(resp)
-    else:
-        logger.warning(f"'{data['title'].strip()}' type {data['@type']}")
-    
-
-def getEditToken(session):
-    params = {
-        'action': "query",
-        'meta': "tokens",
-        'type': "csrf",
-        'format': "json"
-    }
-
-    R = session.get(args.url, params=params)
-    DATA = R.json()
-    logger.debug(DATA)
-    return DATA['query']['tokens']['csrftoken']
-
-if __name__ == "__main__":
-    logger.setLevel(logging.DEBUG)
-    session = get_session()
-    token = getEditToken(session)
-
-    parsedData=[]
-    with open(args.csv, newline='') as csvfile:
-        csvreader = csv.DictReader(csvfile, delimiter=',')
-        for row in tqdm.tqdm(csvreader):
-            data = mapEntry(row)
-            if data is None:
-                continue
-            parsedData.append(data)
-           
-    parsedData.extend([mapAggregate(title, a) for title, a in aggregates.items()])
-    parsedData.extend(cities.values())
-    parsedData.extend(technologies.values())
-    parsedData.extend(institutions.values())
-    # print(parsedData)
-
-    for i, data in enumerate(parsedData):
-        page = renderPage(data)
-        # if data['@type'] == 'City': #only for city as to update coordinates
-        saveIfNotExists(data, page, session, token)
-        
-        # if i >  5:
-        #     break
-        
-    print(f"total: {len(parsedData)} items (of which {len(institutions)} institutions, {len(cities)} cities, {len(technologies)} products)")
-    print (len(parsedData) - len(institutions) - len(cities) - len(technologies), "deployments" )
-
-
-
-# 
-# Title: vendor/unknown
-
-# Postprocessing: make sure unknown are numbered, and multiple related deployments individual unknowns are created.
--- a/csv_importer_technopolice.py
+++ b/csv_importer_technopolice.py
@ -1,340 +0,0 @@
-import urllib.request
-import json
-import logging
-import requests
-import argparse
-import datetime
-import tqdm
-import csv
-
-
-logger = logging.getLogger('wiki.importer')
-
-default_categories = [
-    'Person',
-    'Institution',
-    'Technology',
-    'Deployments',
-    'Dataset',
-    'City',
-    'Country',
-]
-
-
-parser = argparse.ArgumentParser(
-    description='Turn wiki into nodes & links, usable by d3-force.')
-parser.add_argument('--categories', metavar='categories', default=default_categories, nargs='+',
-                    help='Categories')
-parser.add_argument('--url',  default="https://www.securityvision.io/wiki/api.php",
-                    help='Wiki API URL')
-parser.add_argument('--output',  default="semantic_data.json",
-                    help='Output JSON file')
-parser.add_argument('--credentials',  default="no_credentials.json",
-                    help="JSON file containing the Bot's credentials")
-parser.add_argument('--csv',  default="test.csv",
-                    help="CVS file to import")
-parser.add_argument('--citeref',  default="technopoliceFrMarch2020",
-                    help="Bibliography key for imported items")
-
-args = parser.parse_args()
-
-with open(args.credentials) as fp:
-    credentials = json.load(fp)
-    username = credentials['user']
-    password = credentials['password']
-
-
-def get_session():
-    S = requests.Session()
-
-    URL = args.url
-
-    # Retrieve login token first
-    PARAMS_0 = {
-        'action': "query",
-        'meta': "tokens",
-        'type': "login",
-        'format': "json"
-    }
-
-    R = S.get(url=URL, params=PARAMS_0)
-    DATA = R.json()
-    logger.debug(DATA)
-    LOGIN_TOKEN = DATA['query']['tokens']['logintoken']
-
-    logger.debug(LOGIN_TOKEN)
-
-    # Send a post request to login. Using the main account for login is not
-    # supported. Obtain credentials via Special:BotPasswords
-    # (https://www.mediawiki.org/wiki/Special:BotPasswords) for lgname & lgpassword
-
-    PARAMS_1 = {
-        'action': "login",
-        'lgname': username,
-        'lgpassword': password,
-        'lgtoken': LOGIN_TOKEN,
-        'format': "json"
-    }
-
-    R = S.post(URL, data=PARAMS_1)
-    DATA = R.json()
-
-    logger.debug(DATA)
-    if DATA['login']['result'] != 'Success':
-        raise Exception("Failed logging in")
-
-    return S
-
-
-# Map columns
-# split on |
-[
-    'Title'  # Title,
-    'Date added'  # -,
-    'Template'  # Category (map name & fields),
-    'Type de document administratif',  # Documents administratifs
-    'Date',  # Lois ou règlements & Documents administratifs
-    'Produit par',  # Documents administratifs & Lois ou règlements
-    'Titre complet',  # Lois ou règlements & Documents administratifs
-    'Projet(s) lié(s)',  # Documents/Lois/Contentieux
-    # "Financement BPI Safe City|Convention d'expérimentation Safe City Nice|Réponse Etablissement Paris La Déf. Projet SafeCity"
-    'Document(s) lié(s)',
-    'Origine du document',
-    'Description',
-    'Sujet(s)',  # set list of items
-    'URL',
-    'Nom complet',  # acteurs:
-    "Type d'acteur",  # Acteurs
-    'Compétences',  # Acteurs: "Police et justice|Technologies, innovation, R&D"
-    'Adresse',  # Acteurs
-    'Fait partie de', # Acteurs: Page
-    'Image',
-    'Application(s)',  # Projet: Set list of items
-    'Enjeu(x) prioritaire(s)',
-    'Durée du projet', # Project: date range, eg me/01/2020~sa/12/2022 -> Jan 1, 2020 ~ Dec 31, 2022
-    'Description du projet', # Project: (body) text
-    "Coût du projet (en millions d'euros)",  # Projet: number * 1000.000 (project)
-    'Commanditaire(s)',  # Projet: Page
-    'Financements publics',  # Projet: Page
-    'Entreprise(s) prestataire(s)',  # Projet Page
-    'Type de document',
-    'Type de loi ou règlement',  # Lois ou règlements
-    "Date d'adoption",
-    'Domaine(s)',
-    "Période d'applicabilité", 'Dernière modification',
-    'Juridiction',
-    'Geolocation',  # geolocation (projet, Acteurs)
-    'Documents',
-    'Attachments',
-    'Published'
-]
-
-[
-    'Title', # Title,
-    'Date added',# -,
-    'Template'  # Category (map name & fields),
-    'Application(s)',  # Projet: Set list of items
-    'Enjeu(x) prioritaire(s)',  # Projet: Set list of items
-    'Durée du projet', # Project: date range, eg me/01/2020~sa/12/2022 -> Jan 1, 2020 ~ Dec 31, 2022
-    'Description du projet',  # Project: (body) text
-    "Coût du projet (en millions d'euros)", # Projet: number * 1000.000 (project)
-    'Commanditaire(s)',  # Projet: Page
-    'Financements publics', # Project: Page
-    'Entreprise(s) prestataire(s)', # Project: Page
-    'Document(s) lié(s)', # -
-    'URL', #  Url
-    'Nom complet', # -
-    "Type d'acteur", # Acteur: set list
-    'Compétences',   # Acteurs: "Police et justice|Technologies, innovation, R&D"
-    'Adresse', # Acteurs text 
-    'Fait partie de',  # Acteurs: Page
-    'Image',
-    'Description', # Body text
-    'Geolocation',  # geolocation (projet, Acteurs)
-    'Documents',
-    'Attachments',
-    'Published'
-]
-
-
-def mapEntry(entry) -> dict:
-    # 'URL', #  Url (split by |)
-    # 'Description', # Body text
-    # 'Geolocation',  # convert to City
-    if entry['Template'] == 'Projets':
-        return mapDeployment(entry)
-    elif entry['Template'] == 'Acteurs':
-        return mapInstitution(entry)
-    else:
-        logger.critical(f"Invalid category/Template for entry: {entry['Template']}")
-
-def parseStrings(*input):
-    items = []
-    for i in input:
-        items.extend(i.split('|'))
-    return items
-
-def parseGeo(loc):
-    if not len(loc):
-        return ''
-    return '°,'.join(loc.split('|')) + '°'
-
-def parseDate(d):
-    # date is in odd format, so we skip the day (which is DoW instead of DoM)
-    # me/01/2020~sa/12/2022 -> Jan 1, 2020 ~ Dec 31, 2022
-    if '/' in d:
-        parts = d.split('/')
-        return f"{parts[1]}/{parts[2]}"
-    return d
-
-
-def mapDeployment(entry):
-    global args
-    return {
-        'title': entry['Title'],
-        '@type': 'Deployments',
-        'properties': {
-            "Keywords": parseStrings(
-                entry['Application(s)'],
-                entry['Enjeu(x) prioritaire(s)'],
-
-            ),
-            "Managed by": parseStrings(entry['Commanditaire(s)']),
-            "Deployment Start Date": parseDate(entry['Durée du projet'].split('~')[0]),
-            "Deployment End Date": parseDate(entry['Durée du projet'].split('~')[1]) if '~' in entry['Durée du projet'] else '',
-        },
-        "additionalProperties": {
-            "Budget": int(entry["Coût du projet (en millions d'euros)"]) * 1000000 if entry["Coût du projet (en millions d'euros)"] else None,
-            "Funded by": parseStrings(entry['Financements publics']),
-            "Provided by": parseStrings(entry['Entreprise(s) prestataire(s)']),
-            "URL": entry['URL'],
-            "Geolocation": parseGeo(entry['Geolocation']),
-            'CiteRef': args.citeref, 
-        },
-        "body": [entry['Description du projet'], entry['Description']]
-    }
-    # Deployments
-    # 'Application(s)': "Keywords": Capteurs audios, Vidéosurveillance automatisée, Fichiers, Statistiques Big Data,Identification biométrique, profilage, 
-    # 'Enjeu(x) prioritaire(s)',  # Projet: Set list of items (Keywords): Technologies, innovation, R&D, Transport, Éducation, Police et justice, 
-    # 'Durée du projet', # Deployment_Start_Date Deployment_End_Date Project: date range, eg me/01/2020~sa/12/2022 -> Jan 1, 2020 ~ Dec 31, 2022
-    # 'Description du projet',  # Project: (body) text
-    # "Coût du projet (en millions d'euros)", # Projet: number * 1000.000 (project)
-    # 'Commanditaire(s)',  # Projet: Page
-    # 'Financements publics', # Project: Page
-    # 'Entreprise(s) prestataire(s)', # Project: Page
-    pass
-
-def parseType(type):
-    typemap = {
-        'Entreprise': 'Company',
-        'Collectivité territoriale': 'Local Government',
-        'Association': 'NGO',
-        'Syndicat': 'Labor union',
-        'Institution ou organisme public': 'Government',
-        'Juridiction ou autorité de régulation': 'Government',
-        'Juridiction': 'Government',
-    }
-    return typemap[type]
-
-def mapInstitution(entry):
-    global args
-    return {
-        'title': entry['Title'],
-        '@type': 'Institution',
-        'properties': {
-            "Keywords": parseStrings(
-                entry['Compétences']
-            ),
-            "Institution Type": parseType(entry["Type d'acteur"]),
-            "Deployment Start Date": parseDate(entry['Durée du projet'].split('~')[0]),
-            "Deployment End Date": parseDate(entry['Durée du projet'].split('~')[1]) if '~' in entry['Durée du projet'] else '',
-            'URL': entry['URL'],
-            'Address': entry['Adresse'],
-            'Related Institutions': parseStrings(entry['Fait partie de'])
-        },
-        "additionalProperties": {
-            "Geolocation": parseGeo(entry['Geolocation']),
-            'CiteRef': args.citeref,
-        },
-        "body": [entry['Description']]
-    }
-    # "Type d'acteur", # Institution_Type: set list: Entreprise, Collectivité territoriale, Association, Syndicat, Institution ou organisme public, Juridiction ou autorité de régulation, Juridiction
-    # 'Compétences',   # Keywords: Droits fondamentaux, Éducation, "Police et justice|Technologies, innovation, R&D"
-    # 'Adresse', # Address text 
-    # 'Fait partie de',  # Acteurs: Page link in Body: [[Collaborates With::NAME]]
-
-def renderPage(data):
-    global args
-
-    page = f"{{{{{data['@type']}"
-    for key, value in data['properties'].items():
-        page += f"\n|{key}=" + (', '.join(value) if isinstance(value, list) else value)
-    page += "}}\n\n"
-
-    for b in data['body']:
-        if b and len(b):
-            page += f"<blockquote>{b} [[CiteRef::{args.citeref}]]</blockquote>\n\n"
-    
-    if len(data['additionalProperties']):
-        page += "=== Additional properties ===\n\n"
-    for key, value in data['additionalProperties'].items():
-        if not isinstance(value, list):
-            value = [value]
-        
-        for v in value:
-            if v:
-                page += f"* {key} [[{key}::{v}]]\n"
-    return page
-
-def saveIfNotExists(data, page, session, token):
-    # https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=New%20York%20Yankeesdfsdf
-    # baseurl = f"{args.url}?action=query&list=categorymembers&cmtitle=Category:{category}&format=json"
-    params = {
-        'action': 'edit',
-        'createonly': '1',
-        'title': data['title'],
-        'contentformat': 'text/x-wiki',
-        'text': page,
-        'format': 'json',
-        'token': token,
-    }
-    logger.debug(args.url, params)
-    logger.warning(f"Creating {data['title']}")
-    response = session.post(args.url, data=params)
-    resp =  response.json()
-
-    if 'warnings' in resp:
-        logger.warning(resp)
-    
-    logger.debug(resp)
-    # print(responseData)
-    
-
-def getEditToken(session):
-    params = {
-        'action': "query",
-        'meta': "tokens",
-        'type': "csrf",
-        'format': "json"
-    }
-
-    R = session.get(args.url, params=params)
-    DATA = R.json()
-    logger.debug(DATA)
-    return DATA['query']['tokens']['csrftoken']
-
-if __name__ == "__main__":
-    logger.setLevel(logging.DEBUG)
-    session = get_session()
-    token = getEditToken(session)
-
-    i = 0
-    with open(args.csv, newline='') as csvfile:
-        csvreader = csv.DictReader(csvfile, delimiter=',')
-        for row in csvreader:
-            data = mapEntry(row)
-            page = renderPage(data)
-            saveIfNotExists(data, page, session, token)
-            i+= 1
-            # if i >  5:
-            #     break
--- a/fix_blockquote.py
+++ b/fix_blockquote.py
@ -1,19 +0,0 @@
-import re, os
-
-regex = r"(?<=(?<=>).)( \[\[CiteRef::atlasofsurveillance2022\]\]<\/blockquote>\n\n<blockquote>)"
-
-files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.mw')]
-
-for f in files:
-    with open(f, 'r') as fp:
-        contents = fp.read()
-
-    # You can manually specify the number of replacements by changing the 4th argument
-    result = re.sub(regex, "", contents, 0, re.MULTILINE)
-    if contents != result:
-        print (f)
-        with open(f, 'w') as fp:
-            fp.write(result)
-
-
-