From 1ed2ed16c654f0d50c9b86e929b277cdd86e9449 Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Sat, 21 Nov 2020 16:49:05 +0100 Subject: [PATCH] Heatmap of Labeled Faces in the Wild --- .gitignore | 1 + README.md | 28 +++++ accesstoken.example.js | 1 + js/heatmap.min.js | 9 ++ js/leaflet-heatmap.js | 237 +++++++++++++++++++++++++++++++++++++++++ lfw_cities.py | 144 +++++++++++++++++++++++++ lfw_map.html | 99 +++++++++++++++++ lfw_map.php | 119 +++++++++++++++++++++ requirements.txt | 2 + 9 files changed, 640 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 accesstoken.example.js create mode 100644 js/heatmap.min.js create mode 100644 js/leaflet-heatmap.js create mode 100644 lfw_cities.py create mode 100644 lfw_map.html create mode 100644 lfw_map.php create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..22643f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +accesstoken.js diff --git a/README.md b/README.md new file mode 100644 index 0000000..1b3f71a --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +This set of scripts generates a map with the birth places of people in the Labeled Faces in the Wild dataset using information available at WikiData. + +It searches WikiData for the names in LFW, gets their birth place if available, and again uses WikiData to find the coordinates of these cities. Obviously, this further skews the bias of the LFW dataset with the selective information that is available through WikiData. + + + +In total there are 3136 for which a city with coordinates is found on WikiData, and a 2587 people for which this is not the case. + + +## Install + +```bash +virtualenv --system-site-packages -p python3 venv +source venv/bin/activate +pip install -r requirements.txt +``` + +Then fetch the names from lfw: + +``` +wget http://vis-www.cs.umass.edu/lfw/lfw-names.txt +``` + +And create/fill the database: + +``` +python lfw_cities.py --csv lfw-names.txt --db lfw-names.db +``` diff --git a/accesstoken.example.js b/accesstoken.example.js new file mode 100644 index 0000000..75da567 --- /dev/null +++ b/accesstoken.example.js @@ -0,0 +1 @@ +var leaflet_access_token = 'YOURLEAFLETACCESSTOKEN'; diff --git a/js/heatmap.min.js b/js/heatmap.min.js new file mode 100644 index 0000000..9004bb3 --- /dev/null +++ b/js/heatmap.min.js @@ -0,0 +1,9 @@ +/* + * heatmap.js v2.0.2 | JavaScript Heatmap Library + * + * Copyright 2008-2016 Patrick Wied - All rights reserved. + * Dual licensed under MIT and Beerware license + * + * :: 2016-02-04 21:25 + */ +(function(a,b,c){if(typeof module!=="undefined"&&module.exports){module.exports=c()}else if(typeof define==="function"&&define.amd){define(c)}else{b[a]=c()}})("h337",this,function(){var a={defaultRadius:40,defaultRenderer:"canvas2d",defaultGradient:{.25:"rgb(0,0,255)",.55:"rgb(0,255,0)",.85:"yellow",1:"rgb(255,0,0)"},defaultMaxOpacity:1,defaultMinOpacity:0,defaultBlur:.85,defaultXField:"x",defaultYField:"y",defaultValueField:"value",plugins:{}};var b=function h(){var b=function d(a){this._coordinator={};this._data=[];this._radi=[];this._min=0;this._max=1;this._xField=a["xField"]||a.defaultXField;this._yField=a["yField"]||a.defaultYField;this._valueField=a["valueField"]||a.defaultValueField;if(a["radius"]){this._cfgRadius=a["radius"]}};var c=a.defaultRadius;b.prototype={_organiseData:function(a,b){var d=a[this._xField];var e=a[this._yField];var f=this._radi;var g=this._data;var h=this._max;var i=this._min;var j=a[this._valueField]||1;var k=a.radius||this._cfgRadius||c;if(!g[d]){g[d]=[];f[d]=[]}if(!g[d][e]){g[d][e]=j;f[d][e]=k}else{g[d][e]+=j}if(g[d][e]>h){if(!b){this._max=g[d][e]}else{this.setDataMax(g[d][e])}return false}else{return{x:d,y:e,value:j,radius:k,min:i,max:h}}},_unOrganizeData:function(){var a=[];var b=this._data;var c=this._radi;for(var d in b){for(var e in b[d]){a.push({x:d,y:e,radius:c[d][e],value:b[d][e]})}}return{min:this._min,max:this._max,data:a}},_onExtremaChange:function(){this._coordinator.emit("extremachange",{min:this._min,max:this._max})},addData:function(){if(arguments[0].length>0){var a=arguments[0];var b=a.length;while(b--){this.addData.call(this,a[b])}}else{var c=this._organiseData(arguments[0],true);if(c){this._coordinator.emit("renderpartial",{min:this._min,max:this._max,data:[c]})}}return this},setData:function(a){var b=a.data;var c=b.length;this._data=[];this._radi=[];for(var d=0;d0){this._drawAlpha(a);this._colorize()}},renderAll:function(a){this._clear();if(a.data.length>0){this._drawAlpha(c(a));this._colorize()}},_updateGradient:function(b){this._palette=a(b)},updateConfig:function(a){if(a["gradient"]){this._updateGradient(a)}this._setStyles(a)},setDimensions:function(a,b){this._width=a;this._height=b;this.canvas.width=this.shadowCanvas.width=a;this.canvas.height=this.shadowCanvas.height=b},_clear:function(){this.shadowCtx.clearRect(0,0,this._width,this._height);this.ctx.clearRect(0,0,this._width,this._height)},_setStyles:function(a){this._blur=a.blur==0?0:a.blur||a.defaultBlur;if(a.backgroundColor){this.canvas.style.backgroundColor=a.backgroundColor}this._width=this.canvas.width=this.shadowCanvas.width=a.width||this._width;this._height=this.canvas.height=this.shadowCanvas.height=a.height||this._height;this._opacity=(a.opacity||0)*255;this._maxOpacity=(a.maxOpacity||a.defaultMaxOpacity)*255;this._minOpacity=(a.minOpacity||a.defaultMinOpacity)*255;this._useGradientOpacity=!!a.useGradientOpacity},_drawAlpha:function(a){var c=this._min=a.min;var d=this._max=a.max;var a=a.data||[];var e=a.length;var f=1-this._blur;while(e--){var g=a[e];var h=g.x;var i=g.y;var j=g.radius;var k=Math.min(g.value,d);var l=h-j;var m=i-j;var n=this.shadowCtx;var o;if(!this._templates[j]){this._templates[j]=o=b(j,f)}else{o=this._templates[j]}var p=(k-c)/(d-c);n.globalAlpha=p<.01?.01:p;n.drawImage(o,l,m);if(lthis._renderBoundaries[2]){this._renderBoundaries[2]=l+2*j}if(m+2*j>this._renderBoundaries[3]){this._renderBoundaries[3]=m+2*j}}},_colorize:function(){var a=this._renderBoundaries[0];var b=this._renderBoundaries[1];var c=this._renderBoundaries[2]-a;var d=this._renderBoundaries[3]-b;var e=this._width;var f=this._height;var g=this._opacity;var h=this._maxOpacity;var i=this._minOpacity;var j=this._useGradientOpacity;if(a<0){a=0}if(b<0){b=0}if(a+c>e){c=e-a}if(b+d>f){d=f-b}var k=this.shadowCtx.getImageData(a,b,c,d);var l=k.data;var m=l.length;var n=this._palette;for(var o=3;o0){r=g}else{if(p>0;return b},getDataURL:function(){return this.canvas.toDataURL()}};return d}();var d=function j(){var b=false;if(a["defaultRenderer"]==="canvas2d"){b=c}return b}();var e={merge:function(){var a={};var b=arguments.length;for(var c=0;c 0) { + var len = pointOrArray.length; + while(len--) { + this.addData(pointOrArray[len]); + } + } else { + var latField = this.cfg.latField || 'lat'; + var lngField = this.cfg.lngField || 'lng'; + var valueField = this.cfg.valueField || 'value'; + var entry = pointOrArray; + var latlng = new L.LatLng(entry[latField], entry[lngField]); + var dataObj = { latlng: latlng }; + + dataObj[valueField] = entry[valueField]; + this._max = Math.max(this._max, dataObj[valueField]); + this._min = Math.min(this._min, dataObj[valueField]); + + if (entry.radius) { + dataObj.radius = entry.radius; + } + this._data.push(dataObj); + this._draw(); + } + }, + _resetOrigin: function () { + this._origin = this._map.layerPointToLatLng(new L.Point(0, 0)); + + var size = this._map.getSize(); + if (this._width !== size.x || this._height !== size.y) { + this._width = size.x; + this._height = size.y; + + this._el.style.width = this._width + 'px'; + this._el.style.height = this._height + 'px'; + } + this._draw(); + } + }); + + HeatmapOverlay.CSS_TRANSFORM = (function() { + var div = document.createElement('div'); + var props = [ + 'transform', + 'WebkitTransform', + 'MozTransform', + 'OTransform', + 'msTransform' + ]; + + for (var i = 0; i < props.length; i++) { + var prop = props[i]; + if (div.style[prop] !== undefined) { + return prop; + } + } + + return props[0]; + })(); + + return HeatmapOverlay; +}); \ No newline at end of file diff --git a/lfw_cities.py b/lfw_cities.py new file mode 100644 index 0000000..23eed0e --- /dev/null +++ b/lfw_cities.py @@ -0,0 +1,144 @@ +import requests +import logging +import argparse +import sqlite3 +import csv +import urllib.parse + +class LocationStore: + def __init__(self, db_filename): + self.conn = sqlite3.connect(db_filename) + + # make sure the table exits. + createSqls = [""" + CREATE TABLE IF NOT EXISTS `people` ( + `name` VARCHAR(255), + `city_id` INTEGER, + `city_name` VARCHAR(255), + `latitude` VARCHAR(255), + `longitude` VARCHAR(255), + `wd_person_id` VARCHAR ( 20 ) UNIQUE + ); + """, + """ + CREATE UNIQUE INDEX IF NOT EXISTS `unique_name` ON `people` ( + `name` + ); + """] + cur = self.conn.cursor() + for sql in createSqls: + cur.execute(sql) + self.conn.commit() + # + # def __enter__(self): + # self.c = self.conn.cursor() + # return self + # + # def __exit__(self, type, value, traceback): + # self.c.close() + + def addVariable(self, name, wd_id, city_name, city_id, latitude, longitude): + c = self.conn.cursor() + logger.info(f"Queing storing of {name} ({wd_id}) in {city_name} ({city_id}) on {latitude}/{longitude}") + if wd_id == -1 or wd_id == '-1': + wd_id = None + c.execute("INSERT OR REPLACE INTO people (name, wd_person_id, city_name, city_id, latitude, longitude) VALUES (?,?,?,?, ?, ?)", (name, wd_id, city_name, city_id, latitude, longitude)) + self.conn.commit() + c.close() + + def contains(self, name): + cur = self.conn.cursor() + cur.execute(f"SELECT name FROM people WHERE name = ?", (name,)) + values = [v[0] for v in cur.fetchall()] + cur.close() + if len(values): + return True + return False + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('cities') + +argParser = argparse.ArgumentParser(description='Get coordinates for the birth places of the LFW people') +argParser.add_argument( + '--db', + type=str, + required=True, + help='' + ) +argParser.add_argument( + '--csv', + type=str, + required=True, + help='' + ) +# argParser.add_argument( +# '--limit', +# type=int, +# default=1000, +# help='Limit of new messages to parse' + # ) +argParser.add_argument( + '--verbose', + '-v', + action='store_true', + help='Debug logging' + ) +args = argParser.parse_args() + +if args.verbose: + logger.setLevel(logging.DEBUG) + +# problem how to search by title? +# query = """ +# SELECT ?city ?geoloc where { +# # wd:Q47526 wdt:P27 ?geoloc . +# wd:Q47526 wdt:P19 ?city . +# ?city wdt:P17 ?country . +# ?city wdt:P625 ?geoloc . +# SERVICE wikibase:label { +# bd:serviceParam wikibase:language "en" . +# } +# } +# """ + +with open(args.csv, 'r') as fp: + reader = csv.reader(fp, delimiter='\t') + names = [r[0] for r in reader] + +storage = LocationStore(args.db) + + +for name in names: + logger.debug(f"Name: {name}") + if storage.contains(name): + logger.info(f"Skip {name} - exists already") + continue + + urlName = urllib.parse.quote(name) + searchUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites=enwiki&titles={urlName}&format=json" + headers = {"Accept" : "application/json"} + response = requests.get(searchUrl, headers=headers) + data = response.json() + for wdId in data['entities']: + try: + city_id = data['entities'][wdId]['claims']['P19'][0]['mainsnak']['datavalue']['value']['id'] + except Exception as e: + logger.warn(f"No city found for '{name}'") + logger.debug(data) + storage.addVariable(name, wdId, '','','','') + continue + + try: + geolocUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={city_id}&format=json" + r2 = requests.get(geolocUrl, headers=headers) + d2 = r2.json() + city_name = d2['entities'][city_id]['labels']['en']['value'] + latitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude'] + longitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'] + storage.addVariable(name, wdId, city_name, city_id, latitude, longitude) + except Exception as e: + logger.warn(f"Error when doing followup query to {city_id} for {name}") + logger.exception(e) + storage.addVariable(name, wdId, '', city_id, '','') + + break diff --git a/lfw_map.html b/lfw_map.html new file mode 100644 index 0000000..482cc87 --- /dev/null +++ b/lfw_map.html @@ -0,0 +1,99 @@ + + + + + + + + + + + + + +
+ + + + + + diff --git a/lfw_map.php b/lfw_map.php new file mode 100644 index 0000000..097ae61 --- /dev/null +++ b/lfw_map.php @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + +
+ + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3288e92 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +