Heatmap of Labeled Faces in the Wild

This commit is contained in:
Ruben van de Ven 2020-11-21 16:49:05 +01:00
commit 1ed2ed16c6
9 changed files with 640 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
accesstoken.js

28
README.md Normal file
View file

@ -0,0 +1,28 @@
This set of scripts generates a map with the birth places of people in the Labeled Faces in the Wild dataset using information available at WikiData.
It searches WikiData for the names in LFW, gets their birth place if available, and again uses WikiData to find the coordinates of these cities. Obviously, this further skews the bias of the LFW dataset with the selective information that is available through WikiData.
<!-- To be transparent about it, this see the full [list of names](list.md) and whether they are included in the map. -->
In total there are 3136 for which a city with coordinates is found on WikiData, and a 2587 people for which this is not the case.
## Install
```bash
virtualenv --system-site-packages -p python3 venv
source venv/bin/activate
pip install -r requirements.txt
```
Then fetch the names from lfw:
```
wget http://vis-www.cs.umass.edu/lfw/lfw-names.txt
```
And create/fill the database:
```
python lfw_cities.py --csv lfw-names.txt --db lfw-names.db
```

1
accesstoken.example.js Normal file
View file

@ -0,0 +1 @@
var leaflet_access_token = 'YOURLEAFLETACCESSTOKEN';

9
js/heatmap.min.js vendored Normal file

File diff suppressed because one or more lines are too long

237
js/leaflet-heatmap.js Normal file
View file

@ -0,0 +1,237 @@
/*
* Leaflet Heatmap Overlay
*
* Copyright (c) 2014, Patrick Wied (http://www.patrick-wied.at)
* Dual-licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
* and the Beerware (http://en.wikipedia.org/wiki/Beerware) license.
*/
(function (name, context, factory) {
// Supports UMD. AMD, CommonJS/Node.js and browser context
if (typeof module !== "undefined" && module.exports) {
module.exports = factory();
} else if (typeof define === "function" && define.amd) {
define(factory);
} else {
context[name] = factory();
}
})("HeatmapOverlay", this, function () {
// Leaflet < 0.8 compatibility
if (typeof L.Layer === 'undefined') {
L.Layer = L.Class;
}
var HeatmapOverlay = L.Layer.extend({
initialize: function (config) {
this.cfg = config;
this._el = L.DomUtil.create('div', 'leaflet-zoom-hide');
this._data = [];
this._max = 1;
this._min = 0;
this.cfg.container = this._el;
},
onAdd: function (map) {
var size = map.getSize();
var h337 = typeof require !== 'undefined' ? require('heatmap.js') : window.h337;
this._map = map;
this._width = size.x;
this._height = size.y;
this._el.style.width = size.x + 'px';
this._el.style.height = size.y + 'px';
this._el.style.position = 'absolute';
this._resetOrigin();
map.getPanes().overlayPane.appendChild(this._el);
if (!this._heatmap) {
this._heatmap = h337.create(this.cfg);
}
// this resets the origin and redraws whenever
// the zoom changed or the map has been moved
map.on('moveend', this._resetOrigin, this);
this._draw();
},
addTo: function (map) {
map.addLayer(this);
return this;
},
onRemove: function (map) {
// remove layer's DOM elements and listeners
map.getPanes().overlayPane.removeChild(this._el);
map.off('moveend', this._resetOrigin, this);
},
_draw: function() {
if (!this._map) { return; }
var mapPane = this._map.getPanes().mapPane;
var point = mapPane._leaflet_pos;
// reposition the layer
this._el.style[HeatmapOverlay.CSS_TRANSFORM] = 'translate(' +
-Math.round(point.x) + 'px,' +
-Math.round(point.y) + 'px)';
this._update();
},
_update: function() {
var bounds, zoom, scale;
var generatedData = { max: this._max, min: this._min, data: [] };
bounds = this._map.getBounds();
zoom = this._map.getZoom();
scale = Math.pow(2, zoom);
if (this._data.length == 0) {
if (this._heatmap) {
this._heatmap.setData(generatedData);
}
return;
}
var latLngPoints = [];
var radiusMultiplier = this.cfg.scaleRadius ? scale : 1;
var localMax = 0;
var localMin = 0;
var valueField = this.cfg.valueField;
var len = this._data.length;
while (len--) {
var entry = this._data[len];
var value = entry[valueField];
var latlng = entry.latlng;
// we don't wanna render points that are not even on the map ;-)
if (!bounds.contains(latlng)) {
continue;
}
// local max is the maximum within current bounds
localMax = Math.max(value, localMax);
localMin = Math.min(value, localMin);
var point = this._map.latLngToContainerPoint(latlng);
var latlngPoint = { x: Math.round(point.x), y: Math.round(point.y) };
latlngPoint[valueField] = value;
var radius;
if (entry.radius) {
radius = entry.radius * radiusMultiplier;
} else {
radius = (this.cfg.radius || 2) * radiusMultiplier;
}
latlngPoint.radius = radius;
latLngPoints.push(latlngPoint);
}
if (this.cfg.useLocalExtrema) {
generatedData.max = localMax;
generatedData.min = localMin;
}
generatedData.data = latLngPoints;
this._heatmap.setData(generatedData);
},
setData: function(data) {
this._max = data.max || this._max;
this._min = data.min || this._min;
var latField = this.cfg.latField || 'lat';
var lngField = this.cfg.lngField || 'lng';
var valueField = this.cfg.valueField || 'value';
// transform data to latlngs
var data = data.data;
var len = data.length;
var d = [];
while (len--) {
var entry = data[len];
var latlng = new L.LatLng(entry[latField], entry[lngField]);
var dataObj = { latlng: latlng };
dataObj[valueField] = entry[valueField];
if (entry.radius) {
dataObj.radius = entry.radius;
}
d.push(dataObj);
}
this._data = d;
this._draw();
},
// experimential... not ready.
addData: function(pointOrArray) {
if (pointOrArray.length > 0) {
var len = pointOrArray.length;
while(len--) {
this.addData(pointOrArray[len]);
}
} else {
var latField = this.cfg.latField || 'lat';
var lngField = this.cfg.lngField || 'lng';
var valueField = this.cfg.valueField || 'value';
var entry = pointOrArray;
var latlng = new L.LatLng(entry[latField], entry[lngField]);
var dataObj = { latlng: latlng };
dataObj[valueField] = entry[valueField];
this._max = Math.max(this._max, dataObj[valueField]);
this._min = Math.min(this._min, dataObj[valueField]);
if (entry.radius) {
dataObj.radius = entry.radius;
}
this._data.push(dataObj);
this._draw();
}
},
_resetOrigin: function () {
this._origin = this._map.layerPointToLatLng(new L.Point(0, 0));
var size = this._map.getSize();
if (this._width !== size.x || this._height !== size.y) {
this._width = size.x;
this._height = size.y;
this._el.style.width = this._width + 'px';
this._el.style.height = this._height + 'px';
}
this._draw();
}
});
HeatmapOverlay.CSS_TRANSFORM = (function() {
var div = document.createElement('div');
var props = [
'transform',
'WebkitTransform',
'MozTransform',
'OTransform',
'msTransform'
];
for (var i = 0; i < props.length; i++) {
var prop = props[i];
if (div.style[prop] !== undefined) {
return prop;
}
}
return props[0];
})();
return HeatmapOverlay;
});

144
lfw_cities.py Normal file
View file

@ -0,0 +1,144 @@
import requests
import logging
import argparse
import sqlite3
import csv
import urllib.parse
class LocationStore:
def __init__(self, db_filename):
self.conn = sqlite3.connect(db_filename)
# make sure the table exits.
createSqls = ["""
CREATE TABLE IF NOT EXISTS `people` (
`name` VARCHAR(255),
`city_id` INTEGER,
`city_name` VARCHAR(255),
`latitude` VARCHAR(255),
`longitude` VARCHAR(255),
`wd_person_id` VARCHAR ( 20 ) UNIQUE
);
""",
"""
CREATE UNIQUE INDEX IF NOT EXISTS `unique_name` ON `people` (
`name`
);
"""]
cur = self.conn.cursor()
for sql in createSqls:
cur.execute(sql)
self.conn.commit()
#
# def __enter__(self):
# self.c = self.conn.cursor()
# return self
#
# def __exit__(self, type, value, traceback):
# self.c.close()
def addVariable(self, name, wd_id, city_name, city_id, latitude, longitude):
c = self.conn.cursor()
logger.info(f"Queing storing of {name} ({wd_id}) in {city_name} ({city_id}) on {latitude}/{longitude}")
if wd_id == -1 or wd_id == '-1':
wd_id = None
c.execute("INSERT OR REPLACE INTO people (name, wd_person_id, city_name, city_id, latitude, longitude) VALUES (?,?,?,?, ?, ?)", (name, wd_id, city_name, city_id, latitude, longitude))
self.conn.commit()
c.close()
def contains(self, name):
cur = self.conn.cursor()
cur.execute(f"SELECT name FROM people WHERE name = ?", (name,))
values = [v[0] for v in cur.fetchall()]
cur.close()
if len(values):
return True
return False
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('cities')
argParser = argparse.ArgumentParser(description='Get coordinates for the birth places of the LFW people')
argParser.add_argument(
'--db',
type=str,
required=True,
help=''
)
argParser.add_argument(
'--csv',
type=str,
required=True,
help=''
)
# argParser.add_argument(
# '--limit',
# type=int,
# default=1000,
# help='Limit of new messages to parse'
# )
argParser.add_argument(
'--verbose',
'-v',
action='store_true',
help='Debug logging'
)
args = argParser.parse_args()
if args.verbose:
logger.setLevel(logging.DEBUG)
# problem how to search by title?
# query = """
# SELECT ?city ?geoloc where {
# # wd:Q47526 wdt:P27 ?geoloc .
# wd:Q47526 wdt:P19 ?city .
# ?city wdt:P17 ?country .
# ?city wdt:P625 ?geoloc .
# SERVICE wikibase:label {
# bd:serviceParam wikibase:language "en" .
# }
# }
# """
with open(args.csv, 'r') as fp:
reader = csv.reader(fp, delimiter='\t')
names = [r[0] for r in reader]
storage = LocationStore(args.db)
for name in names:
logger.debug(f"Name: {name}")
if storage.contains(name):
logger.info(f"Skip {name} - exists already")
continue
urlName = urllib.parse.quote(name)
searchUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites=enwiki&titles={urlName}&format=json"
headers = {"Accept" : "application/json"}
response = requests.get(searchUrl, headers=headers)
data = response.json()
for wdId in data['entities']:
try:
city_id = data['entities'][wdId]['claims']['P19'][0]['mainsnak']['datavalue']['value']['id']
except Exception as e:
logger.warn(f"No city found for '{name}'")
logger.debug(data)
storage.addVariable(name, wdId, '','','','')
continue
try:
geolocUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={city_id}&format=json"
r2 = requests.get(geolocUrl, headers=headers)
d2 = r2.json()
city_name = d2['entities'][city_id]['labels']['en']['value']
latitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude']
longitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']
storage.addVariable(name, wdId, city_name, city_id, latitude, longitude)
except Exception as e:
logger.warn(f"Error when doing followup query to {city_id} for {name}")
logger.exception(e)
storage.addVariable(name, wdId, '', city_id, '','')
break

99
lfw_map.html Normal file

File diff suppressed because one or more lines are too long

119
lfw_map.php Normal file
View file

@ -0,0 +1,119 @@
<!DOCTYPE html>
<html lang="en" dir="ltr">
<head>
<meta charset="utf-8">
<title></title>
<link rel="stylesheet" href="//unpkg.com/leaflet@1.5.1/dist/leaflet.css"
integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
crossorigin=""/>
<script src="//unpkg.com/leaflet@1.5.1/dist/leaflet.js"
integrity="sha512-GffPMF3RvMeYyc1LWMHtK8EbPv0iNZ8/oTtHPx9/cc2ILxQ+u905qIwdpULaqDkyBKgOaB57QTMg7ztg8Jm2Og=="
crossorigin=""></script>
<script src="accesstoken.js"></script>
<style media="screen">
html, body, #map{
width:100%;height:100%;
padding:0;
margin:0;
}
.leaflet-container{
background:black /*purple*/;
}
</style>
</head>
<body>
<div id="map"></div>
<script src="js/heatmap.min.js"></script>
<script src="js/leaflet-heatmap.js"></script>
<script type="text/javascript">
// var map = L.map('map').setView([0,0], 0);
var tileLayer = L.tileLayer('https://api.tiles.mapbox.com/v4/{id}/{z}/{x}/{y}.png?access_token={accessToken}', {
attribution: 'Map data &copy; <a href="https://www.openstreetmap.org/">OpenStreetMap</a> contributors, <a href="https://creativecommons.org/licenses/by-sa/2.0/">CC-BY-SA</a>, Imagery © <a href="https://www.mapbox.com/">Mapbox</a>',
maxZoom: 18,
id: 'mapbox.light',
accessToken: leaflet_access_token
});
var cfg = {
// radius should be small ONLY if scaleRadius is true (or small radius is intended)
// if scaleRadius is false it will be the constant radius used in pixels
"radius": 2.5,
"maxOpacity": .8,
// scales the radius based on map zoom
"scaleRadius": true,
// if set to false the heatmap uses the global maximum for colorization
// if activated: uses the data maximum within the current map boundaries
// (there will always be a red spot with useLocalExtremas true)
"useLocalExtrema": false,
// which field name in your data represents the latitude - default "lat"
latField: 'lat',
// which field name in your data represents the longitude - default "lng"
lngField: 'lng',
// which field name in your data represents the data value - default "value"
valueField: 'count'
};
var heatmapLayer = new HeatmapOverlay(cfg);
var map = new L.Map('map', {
center: new L.LatLng(0,0),
zoom: 3,
layers: [
// tileLayer,
heatmapLayer]
});
// let r = new Request(`world.geo.json`);
// fetch(r)
// .then(response => response.json())
// .then(response => {
// L.geoJson(response, { // initialize layer with data
// style: function (feature) { // Style option
// return {
// 'weight': 1,
// 'color': 'black',
// 'fillColor': 'yellow'
// }
// }
// }).addTo(map); // Add layer to map
// }).catch(function(e){
// console.error(e);
// });
<?php
$dsn = 'sqlite:./lfw.db';
$dbh = new PDO($dsn);
$sql = "SELECT latitude, longitude FROM people WHERE latitude != '';";
$stmt = $dbh->prepare($sql);
// $stmt->bindParam(':rhyme_format', );
$stmt->execute($params);
$items = [];
while($person = $stmt->fetch()) {
// echo "L.marker([{$person['latitude']}, {$person['longitude']}]).addTo(map);\n";
$items[] = "{lat: {$person['latitude']}, lng: {$person['longitude']}, count: 1}";
}
echo "var lfwData = {max:18,data: [".implode(',',$items)."]};";
?>
heatmapLayer.setData(lfwData);
</script>
</body>
</html>

2
requirements.txt Normal file
View file

@ -0,0 +1,2 @@
requests