Heatmap of Labeled Faces in the Wild
This commit is contained in:
commit
1ed2ed16c6
9 changed files with 640 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
accesstoken.js
|
28
README.md
Normal file
28
README.md
Normal file
|
@ -0,0 +1,28 @@
|
|||
This set of scripts generates a map with the birth places of people in the Labeled Faces in the Wild dataset using information available at WikiData.
|
||||
|
||||
It searches WikiData for the names in LFW, gets their birth place if available, and again uses WikiData to find the coordinates of these cities. Obviously, this further skews the bias of the LFW dataset with the selective information that is available through WikiData.
|
||||
|
||||
<!-- To be transparent about it, this see the full [list of names](list.md) and whether they are included in the map. -->
|
||||
|
||||
In total there are 3136 for which a city with coordinates is found on WikiData, and a 2587 people for which this is not the case.
|
||||
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
virtualenv --system-site-packages -p python3 venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Then fetch the names from lfw:
|
||||
|
||||
```
|
||||
wget http://vis-www.cs.umass.edu/lfw/lfw-names.txt
|
||||
```
|
||||
|
||||
And create/fill the database:
|
||||
|
||||
```
|
||||
python lfw_cities.py --csv lfw-names.txt --db lfw-names.db
|
||||
```
|
1
accesstoken.example.js
Normal file
1
accesstoken.example.js
Normal file
|
@ -0,0 +1 @@
|
|||
var leaflet_access_token = 'YOURLEAFLETACCESSTOKEN';
|
9
js/heatmap.min.js
vendored
Normal file
9
js/heatmap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
237
js/leaflet-heatmap.js
Normal file
237
js/leaflet-heatmap.js
Normal file
|
@ -0,0 +1,237 @@
|
|||
/*
|
||||
* Leaflet Heatmap Overlay
|
||||
*
|
||||
* Copyright (c) 2014, Patrick Wied (http://www.patrick-wied.at)
|
||||
* Dual-licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
|
||||
* and the Beerware (http://en.wikipedia.org/wiki/Beerware) license.
|
||||
*/
|
||||
|
||||
(function (name, context, factory) {
|
||||
|
||||
// Supports UMD. AMD, CommonJS/Node.js and browser context
|
||||
if (typeof module !== "undefined" && module.exports) {
|
||||
module.exports = factory();
|
||||
} else if (typeof define === "function" && define.amd) {
|
||||
define(factory);
|
||||
} else {
|
||||
context[name] = factory();
|
||||
}
|
||||
|
||||
})("HeatmapOverlay", this, function () {
|
||||
|
||||
// Leaflet < 0.8 compatibility
|
||||
if (typeof L.Layer === 'undefined') {
|
||||
L.Layer = L.Class;
|
||||
}
|
||||
|
||||
var HeatmapOverlay = L.Layer.extend({
|
||||
|
||||
initialize: function (config) {
|
||||
this.cfg = config;
|
||||
this._el = L.DomUtil.create('div', 'leaflet-zoom-hide');
|
||||
this._data = [];
|
||||
this._max = 1;
|
||||
this._min = 0;
|
||||
this.cfg.container = this._el;
|
||||
},
|
||||
|
||||
onAdd: function (map) {
|
||||
var size = map.getSize();
|
||||
var h337 = typeof require !== 'undefined' ? require('heatmap.js') : window.h337;
|
||||
|
||||
this._map = map;
|
||||
|
||||
this._width = size.x;
|
||||
this._height = size.y;
|
||||
|
||||
this._el.style.width = size.x + 'px';
|
||||
this._el.style.height = size.y + 'px';
|
||||
this._el.style.position = 'absolute';
|
||||
|
||||
this._resetOrigin();
|
||||
|
||||
map.getPanes().overlayPane.appendChild(this._el);
|
||||
|
||||
if (!this._heatmap) {
|
||||
this._heatmap = h337.create(this.cfg);
|
||||
}
|
||||
|
||||
// this resets the origin and redraws whenever
|
||||
// the zoom changed or the map has been moved
|
||||
map.on('moveend', this._resetOrigin, this);
|
||||
this._draw();
|
||||
},
|
||||
|
||||
addTo: function (map) {
|
||||
map.addLayer(this);
|
||||
return this;
|
||||
},
|
||||
|
||||
onRemove: function (map) {
|
||||
// remove layer's DOM elements and listeners
|
||||
map.getPanes().overlayPane.removeChild(this._el);
|
||||
|
||||
map.off('moveend', this._resetOrigin, this);
|
||||
},
|
||||
_draw: function() {
|
||||
if (!this._map) { return; }
|
||||
|
||||
var mapPane = this._map.getPanes().mapPane;
|
||||
var point = mapPane._leaflet_pos;
|
||||
|
||||
// reposition the layer
|
||||
this._el.style[HeatmapOverlay.CSS_TRANSFORM] = 'translate(' +
|
||||
-Math.round(point.x) + 'px,' +
|
||||
-Math.round(point.y) + 'px)';
|
||||
|
||||
this._update();
|
||||
},
|
||||
_update: function() {
|
||||
var bounds, zoom, scale;
|
||||
var generatedData = { max: this._max, min: this._min, data: [] };
|
||||
|
||||
bounds = this._map.getBounds();
|
||||
zoom = this._map.getZoom();
|
||||
scale = Math.pow(2, zoom);
|
||||
|
||||
if (this._data.length == 0) {
|
||||
if (this._heatmap) {
|
||||
this._heatmap.setData(generatedData);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
var latLngPoints = [];
|
||||
var radiusMultiplier = this.cfg.scaleRadius ? scale : 1;
|
||||
var localMax = 0;
|
||||
var localMin = 0;
|
||||
var valueField = this.cfg.valueField;
|
||||
var len = this._data.length;
|
||||
|
||||
while (len--) {
|
||||
var entry = this._data[len];
|
||||
var value = entry[valueField];
|
||||
var latlng = entry.latlng;
|
||||
|
||||
|
||||
// we don't wanna render points that are not even on the map ;-)
|
||||
if (!bounds.contains(latlng)) {
|
||||
continue;
|
||||
}
|
||||
// local max is the maximum within current bounds
|
||||
localMax = Math.max(value, localMax);
|
||||
localMin = Math.min(value, localMin);
|
||||
|
||||
var point = this._map.latLngToContainerPoint(latlng);
|
||||
var latlngPoint = { x: Math.round(point.x), y: Math.round(point.y) };
|
||||
latlngPoint[valueField] = value;
|
||||
|
||||
var radius;
|
||||
|
||||
if (entry.radius) {
|
||||
radius = entry.radius * radiusMultiplier;
|
||||
} else {
|
||||
radius = (this.cfg.radius || 2) * radiusMultiplier;
|
||||
}
|
||||
latlngPoint.radius = radius;
|
||||
latLngPoints.push(latlngPoint);
|
||||
}
|
||||
if (this.cfg.useLocalExtrema) {
|
||||
generatedData.max = localMax;
|
||||
generatedData.min = localMin;
|
||||
}
|
||||
|
||||
generatedData.data = latLngPoints;
|
||||
|
||||
this._heatmap.setData(generatedData);
|
||||
},
|
||||
setData: function(data) {
|
||||
this._max = data.max || this._max;
|
||||
this._min = data.min || this._min;
|
||||
var latField = this.cfg.latField || 'lat';
|
||||
var lngField = this.cfg.lngField || 'lng';
|
||||
var valueField = this.cfg.valueField || 'value';
|
||||
|
||||
// transform data to latlngs
|
||||
var data = data.data;
|
||||
var len = data.length;
|
||||
var d = [];
|
||||
|
||||
while (len--) {
|
||||
var entry = data[len];
|
||||
var latlng = new L.LatLng(entry[latField], entry[lngField]);
|
||||
var dataObj = { latlng: latlng };
|
||||
dataObj[valueField] = entry[valueField];
|
||||
if (entry.radius) {
|
||||
dataObj.radius = entry.radius;
|
||||
}
|
||||
d.push(dataObj);
|
||||
}
|
||||
this._data = d;
|
||||
|
||||
this._draw();
|
||||
},
|
||||
// experimential... not ready.
|
||||
addData: function(pointOrArray) {
|
||||
if (pointOrArray.length > 0) {
|
||||
var len = pointOrArray.length;
|
||||
while(len--) {
|
||||
this.addData(pointOrArray[len]);
|
||||
}
|
||||
} else {
|
||||
var latField = this.cfg.latField || 'lat';
|
||||
var lngField = this.cfg.lngField || 'lng';
|
||||
var valueField = this.cfg.valueField || 'value';
|
||||
var entry = pointOrArray;
|
||||
var latlng = new L.LatLng(entry[latField], entry[lngField]);
|
||||
var dataObj = { latlng: latlng };
|
||||
|
||||
dataObj[valueField] = entry[valueField];
|
||||
this._max = Math.max(this._max, dataObj[valueField]);
|
||||
this._min = Math.min(this._min, dataObj[valueField]);
|
||||
|
||||
if (entry.radius) {
|
||||
dataObj.radius = entry.radius;
|
||||
}
|
||||
this._data.push(dataObj);
|
||||
this._draw();
|
||||
}
|
||||
},
|
||||
_resetOrigin: function () {
|
||||
this._origin = this._map.layerPointToLatLng(new L.Point(0, 0));
|
||||
|
||||
var size = this._map.getSize();
|
||||
if (this._width !== size.x || this._height !== size.y) {
|
||||
this._width = size.x;
|
||||
this._height = size.y;
|
||||
|
||||
this._el.style.width = this._width + 'px';
|
||||
this._el.style.height = this._height + 'px';
|
||||
}
|
||||
this._draw();
|
||||
}
|
||||
});
|
||||
|
||||
HeatmapOverlay.CSS_TRANSFORM = (function() {
|
||||
var div = document.createElement('div');
|
||||
var props = [
|
||||
'transform',
|
||||
'WebkitTransform',
|
||||
'MozTransform',
|
||||
'OTransform',
|
||||
'msTransform'
|
||||
];
|
||||
|
||||
for (var i = 0; i < props.length; i++) {
|
||||
var prop = props[i];
|
||||
if (div.style[prop] !== undefined) {
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
return props[0];
|
||||
})();
|
||||
|
||||
return HeatmapOverlay;
|
||||
});
|
144
lfw_cities.py
Normal file
144
lfw_cities.py
Normal file
|
@ -0,0 +1,144 @@
|
|||
import requests
|
||||
import logging
|
||||
import argparse
|
||||
import sqlite3
|
||||
import csv
|
||||
import urllib.parse
|
||||
|
||||
class LocationStore:
|
||||
def __init__(self, db_filename):
|
||||
self.conn = sqlite3.connect(db_filename)
|
||||
|
||||
# make sure the table exits.
|
||||
createSqls = ["""
|
||||
CREATE TABLE IF NOT EXISTS `people` (
|
||||
`name` VARCHAR(255),
|
||||
`city_id` INTEGER,
|
||||
`city_name` VARCHAR(255),
|
||||
`latitude` VARCHAR(255),
|
||||
`longitude` VARCHAR(255),
|
||||
`wd_person_id` VARCHAR ( 20 ) UNIQUE
|
||||
);
|
||||
""",
|
||||
"""
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS `unique_name` ON `people` (
|
||||
`name`
|
||||
);
|
||||
"""]
|
||||
cur = self.conn.cursor()
|
||||
for sql in createSqls:
|
||||
cur.execute(sql)
|
||||
self.conn.commit()
|
||||
#
|
||||
# def __enter__(self):
|
||||
# self.c = self.conn.cursor()
|
||||
# return self
|
||||
#
|
||||
# def __exit__(self, type, value, traceback):
|
||||
# self.c.close()
|
||||
|
||||
def addVariable(self, name, wd_id, city_name, city_id, latitude, longitude):
|
||||
c = self.conn.cursor()
|
||||
logger.info(f"Queing storing of {name} ({wd_id}) in {city_name} ({city_id}) on {latitude}/{longitude}")
|
||||
if wd_id == -1 or wd_id == '-1':
|
||||
wd_id = None
|
||||
c.execute("INSERT OR REPLACE INTO people (name, wd_person_id, city_name, city_id, latitude, longitude) VALUES (?,?,?,?, ?, ?)", (name, wd_id, city_name, city_id, latitude, longitude))
|
||||
self.conn.commit()
|
||||
c.close()
|
||||
|
||||
def contains(self, name):
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(f"SELECT name FROM people WHERE name = ?", (name,))
|
||||
values = [v[0] for v in cur.fetchall()]
|
||||
cur.close()
|
||||
if len(values):
|
||||
return True
|
||||
return False
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger('cities')
|
||||
|
||||
argParser = argparse.ArgumentParser(description='Get coordinates for the birth places of the LFW people')
|
||||
argParser.add_argument(
|
||||
'--db',
|
||||
type=str,
|
||||
required=True,
|
||||
help=''
|
||||
)
|
||||
argParser.add_argument(
|
||||
'--csv',
|
||||
type=str,
|
||||
required=True,
|
||||
help=''
|
||||
)
|
||||
# argParser.add_argument(
|
||||
# '--limit',
|
||||
# type=int,
|
||||
# default=1000,
|
||||
# help='Limit of new messages to parse'
|
||||
# )
|
||||
argParser.add_argument(
|
||||
'--verbose',
|
||||
'-v',
|
||||
action='store_true',
|
||||
help='Debug logging'
|
||||
)
|
||||
args = argParser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# problem how to search by title?
|
||||
# query = """
|
||||
# SELECT ?city ?geoloc where {
|
||||
# # wd:Q47526 wdt:P27 ?geoloc .
|
||||
# wd:Q47526 wdt:P19 ?city .
|
||||
# ?city wdt:P17 ?country .
|
||||
# ?city wdt:P625 ?geoloc .
|
||||
# SERVICE wikibase:label {
|
||||
# bd:serviceParam wikibase:language "en" .
|
||||
# }
|
||||
# }
|
||||
# """
|
||||
|
||||
with open(args.csv, 'r') as fp:
|
||||
reader = csv.reader(fp, delimiter='\t')
|
||||
names = [r[0] for r in reader]
|
||||
|
||||
storage = LocationStore(args.db)
|
||||
|
||||
|
||||
for name in names:
|
||||
logger.debug(f"Name: {name}")
|
||||
if storage.contains(name):
|
||||
logger.info(f"Skip {name} - exists already")
|
||||
continue
|
||||
|
||||
urlName = urllib.parse.quote(name)
|
||||
searchUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites=enwiki&titles={urlName}&format=json"
|
||||
headers = {"Accept" : "application/json"}
|
||||
response = requests.get(searchUrl, headers=headers)
|
||||
data = response.json()
|
||||
for wdId in data['entities']:
|
||||
try:
|
||||
city_id = data['entities'][wdId]['claims']['P19'][0]['mainsnak']['datavalue']['value']['id']
|
||||
except Exception as e:
|
||||
logger.warn(f"No city found for '{name}'")
|
||||
logger.debug(data)
|
||||
storage.addVariable(name, wdId, '','','','')
|
||||
continue
|
||||
|
||||
try:
|
||||
geolocUrl = f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={city_id}&format=json"
|
||||
r2 = requests.get(geolocUrl, headers=headers)
|
||||
d2 = r2.json()
|
||||
city_name = d2['entities'][city_id]['labels']['en']['value']
|
||||
latitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude']
|
||||
longitude = d2['entities'][city_id]['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']
|
||||
storage.addVariable(name, wdId, city_name, city_id, latitude, longitude)
|
||||
except Exception as e:
|
||||
logger.warn(f"Error when doing followup query to {city_id} for {name}")
|
||||
logger.exception(e)
|
||||
storage.addVariable(name, wdId, '', city_id, '','')
|
||||
|
||||
break
|
99
lfw_map.html
Normal file
99
lfw_map.html
Normal file
File diff suppressed because one or more lines are too long
119
lfw_map.php
Normal file
119
lfw_map.php
Normal file
|
@ -0,0 +1,119 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en" dir="ltr">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title></title>
|
||||
<link rel="stylesheet" href="//unpkg.com/leaflet@1.5.1/dist/leaflet.css"
|
||||
integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
|
||||
crossorigin=""/>
|
||||
<script src="//unpkg.com/leaflet@1.5.1/dist/leaflet.js"
|
||||
integrity="sha512-GffPMF3RvMeYyc1LWMHtK8EbPv0iNZ8/oTtHPx9/cc2ILxQ+u905qIwdpULaqDkyBKgOaB57QTMg7ztg8Jm2Og=="
|
||||
crossorigin=""></script>
|
||||
<script src="accesstoken.js"></script>
|
||||
|
||||
<style media="screen">
|
||||
html, body, #map{
|
||||
width:100%;height:100%;
|
||||
padding:0;
|
||||
margin:0;
|
||||
}
|
||||
.leaflet-container{
|
||||
background:black /*purple*/;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
||||
<div id="map"></div>
|
||||
<script src="js/heatmap.min.js"></script>
|
||||
<script src="js/leaflet-heatmap.js"></script>
|
||||
<script type="text/javascript">
|
||||
|
||||
|
||||
// var map = L.map('map').setView([0,0], 0);
|
||||
var tileLayer = L.tileLayer('https://api.tiles.mapbox.com/v4/{id}/{z}/{x}/{y}.png?access_token={accessToken}', {
|
||||
attribution: 'Map data © <a href="https://www.openstreetmap.org/">OpenStreetMap</a> contributors, <a href="https://creativecommons.org/licenses/by-sa/2.0/">CC-BY-SA</a>, Imagery © <a href="https://www.mapbox.com/">Mapbox</a>',
|
||||
maxZoom: 18,
|
||||
id: 'mapbox.light',
|
||||
accessToken: leaflet_access_token
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
var cfg = {
|
||||
// radius should be small ONLY if scaleRadius is true (or small radius is intended)
|
||||
// if scaleRadius is false it will be the constant radius used in pixels
|
||||
"radius": 2.5,
|
||||
"maxOpacity": .8,
|
||||
// scales the radius based on map zoom
|
||||
"scaleRadius": true,
|
||||
// if set to false the heatmap uses the global maximum for colorization
|
||||
// if activated: uses the data maximum within the current map boundaries
|
||||
// (there will always be a red spot with useLocalExtremas true)
|
||||
"useLocalExtrema": false,
|
||||
// which field name in your data represents the latitude - default "lat"
|
||||
latField: 'lat',
|
||||
// which field name in your data represents the longitude - default "lng"
|
||||
lngField: 'lng',
|
||||
// which field name in your data represents the data value - default "value"
|
||||
valueField: 'count'
|
||||
};
|
||||
|
||||
|
||||
var heatmapLayer = new HeatmapOverlay(cfg);
|
||||
|
||||
var map = new L.Map('map', {
|
||||
center: new L.LatLng(0,0),
|
||||
zoom: 3,
|
||||
layers: [
|
||||
// tileLayer,
|
||||
heatmapLayer]
|
||||
});
|
||||
|
||||
|
||||
|
||||
// let r = new Request(`world.geo.json`);
|
||||
// fetch(r)
|
||||
// .then(response => response.json())
|
||||
// .then(response => {
|
||||
// L.geoJson(response, { // initialize layer with data
|
||||
// style: function (feature) { // Style option
|
||||
// return {
|
||||
// 'weight': 1,
|
||||
// 'color': 'black',
|
||||
// 'fillColor': 'yellow'
|
||||
// }
|
||||
// }
|
||||
// }).addTo(map); // Add layer to map
|
||||
// }).catch(function(e){
|
||||
// console.error(e);
|
||||
// });
|
||||
|
||||
|
||||
<?php
|
||||
|
||||
|
||||
$dsn = 'sqlite:./lfw.db';
|
||||
$dbh = new PDO($dsn);
|
||||
$sql = "SELECT latitude, longitude FROM people WHERE latitude != '';";
|
||||
|
||||
$stmt = $dbh->prepare($sql);
|
||||
// $stmt->bindParam(':rhyme_format', );
|
||||
$stmt->execute($params);
|
||||
|
||||
|
||||
$items = [];
|
||||
while($person = $stmt->fetch()) {
|
||||
// echo "L.marker([{$person['latitude']}, {$person['longitude']}]).addTo(map);\n";
|
||||
$items[] = "{lat: {$person['latitude']}, lng: {$person['longitude']}, count: 1}";
|
||||
}
|
||||
echo "var lfwData = {max:18,data: [".implode(',',$items)."]};";
|
||||
|
||||
?>
|
||||
|
||||
heatmapLayer.setData(lfwData);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
requests
|
||||
|
Loading…
Reference in a new issue