guest_worker/sorteerhoed/webserver.py

500 lines
19 KiB
Python
Raw Normal View History

import json
import logging
import os
2019-09-11 18:16:33 +02:00
import tornado.ioloop
import tornado.web
import tornado.websocket
from urllib.parse import urlparse
2019-10-30 15:19:32 +01:00
import magic
2019-10-23 10:56:28 +02:00
from threading import Thread, Event
from queue import Queue, Empty
2019-10-23 10:56:28 +02:00
import asyncio
2019-10-23 22:33:37 +02:00
from sorteerhoed import HITStore
from sorteerhoed.Signal import Signal
import httpagentparser
import geoip2.database
2019-10-30 15:19:32 +01:00
import queue
2019-11-01 19:09:20 +01:00
import datetime
import html
2019-10-23 10:56:28 +02:00
logger = logging.getLogger("sorteerhoed").getChild("webserver")
2019-09-11 18:16:33 +02:00
class StaticFileWithHeaderHandler(tornado.web.StaticFileHandler):
def set_extra_headers(self, path):
"""For subclass to add extra headers to the response"""
if path[-5:] == '.html':
self.set_header("Access-Control-Allow-Origin", "*")
if path[-4:] == '.svg':
self.set_header("Content-Type", "image/svg+xml")
2019-10-30 15:19:32 +01:00
if path[-4:] == '.png':
# in testing, without scanner, images are saved as svg
mime = magic.from_file(os.path.join(self.root, path), mime=True)
print(mime)
if mime == 'image/svg+xml':
self.set_header("Content-Type", "image/svg+xml")
2019-09-11 18:16:33 +02:00
class WebSocketHandler(tornado.websocket.WebSocketHandler):
CORS_ORIGINS = ['localhost', '.mturk.com', 'here.rubenvandeven.com', 'guest.rubenvandeven.com']
2019-09-11 18:16:33 +02:00
connections = set()
2019-11-01 17:02:38 +01:00
def initialize(self, config, plotterQ: Queue, eventQ: Queue, store: HITStore):
2019-10-23 22:33:37 +02:00
self.config = config
self.plotterQ = plotterQ
self.eventQ = eventQ
self.store = store
2019-09-11 18:16:33 +02:00
def check_origin(self, origin):
parsed_origin = urlparse(origin)
# parsed_origin.netloc.lower() gives localhost:3333
valid = any([parsed_origin.hostname.endswith(origin) for origin in self.CORS_ORIGINS])
return valid
# the client connected
def open(self, p = None):
self.__class__.connections.add(self)
2019-11-01 17:02:38 +01:00
hit_id = int(self.get_query_argument('id'))
if hit_id != self.store.currentHit.id:
self.close()
return
self.hit = self.store.currentHit
2019-10-23 22:33:37 +02:00
self.timeout = datetime.datetime.now() + datetime.timedelta(seconds=self.store.getHitTimeout())
2019-10-23 22:33:37 +02:00
if self.hit.submit_hit_at:
raise Exception("Opening websocket for already submitted hit")
2019-11-01 17:02:38 +01:00
#logger.info(f"New client connected: {self.request.remote_ip} for {self.hit.id}/{self.hit.hit_id}")
2019-10-30 12:44:25 +01:00
self.eventQ.put(Signal('server.open', dict(hit_id=self.hit.id)))
self.strokes = []
2019-10-30 12:44:25 +01:00
# Gather some initial information:
2019-10-23 22:33:37 +02:00
ua = self.request.headers.get('User-Agent', None)
if ua:
ua_info = httpagentparser.detect(ua)
self.eventQ.put(Signal('hit.info', dict(hit_id=self.hit.id, os=ua_info['os']['name'], browser=ua_info['browser']['name'])))
2019-11-01 17:02:38 +01:00
# self.write_message("hello!")
2019-09-11 18:16:33 +02:00
# the client sent the message
def on_message(self, message):
logger.debug(f"recieve: {message}")
if datetime.datetime.now() > self.timeout:
logger.critical("Close websocket after timeout (abandon?)")
self.close()
return
2019-09-11 18:16:33 +02:00
try:
msg = json.loads(message)
# TODO: sanitize input: min/max, limit strokes
2019-09-11 18:16:33 +02:00
if msg['action'] == 'move':
# TODO: min/max input
point = [float(msg['direction'][0]),float(msg['direction'][1]), bool(msg['mouse'])]
self.strokes.append(point)
2019-10-23 22:33:37 +02:00
self.plotterQ.put(point)
2019-09-11 18:16:33 +02:00
elif msg['action'] == 'up':
logger.info(f'up: {msg}')
point = [msg['direction'][0],msg['direction'][1], 1]
self.strokes.append(point)
2019-09-11 18:16:33 +02:00
elif msg['action'] == 'submit':
2019-10-23 22:33:37 +02:00
logger.info(f'submit: {msg}')
id = self.submit_strokes()
if not id:
self.write_message(json.dumps('error'))
return
#store svg:
d = html.escape(msg['d'])
svg = f"""<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.0" viewBox="0 0 {self.config['scanner']['width']}0 {self.config['scanner']['height']}0" width="{self.config['scanner']['width']}mm" height="{self.config['scanner']['height']}mm" preserveAspectRatio="none">
<path d="{d}" style='stroke:gray;stroke-width:2mm;fill:none;' id="stroke" />
</svg>
"""
with open(self.store.currentHit.getSvgImagePath(), 'w') as fp:
fp.write(svg)
self.write_message(json.dumps({
'action': 'submitted',
'msg': f"Submission ok, please copy this token to your HIT at Mechanical Turk: {self.hit.uuid}",
'code': str(self.hit.uuid)
}))
2019-11-01 17:02:38 +01:00
self.close()
2019-10-23 22:33:37 +02:00
2019-09-11 18:16:33 +02:00
elif msg['action'] == 'down':
# not used, implicit in move?
pass
2019-10-23 22:33:37 +02:00
elif msg['action'] == 'info':
self.eventQ.put(Signal('hit.info', dict(
hit_id=self.hit.id,
resolution=msg['resolution'],
browser=msg['browser']
)))
pass
2019-09-11 18:16:33 +02:00
else:
# self.send({'alert': 'Unknown request: {}'.format(message)})
logger.warn('Unknown request: {}'.format(message))
except Exception as e:
# self.send({'alert': 'Invalid request: {}'.format(e)})
logger.exception(e)
# client disconnected
def on_close(self):
self.__class__.rmConnection(self)
2019-09-12 14:52:38 +02:00
logger.info(f"Client disconnected: {self.request.remote_ip}")
def submit_strokes(self):
if len(self.strokes) < 1:
return False
2019-10-23 22:33:37 +02:00
self.eventQ.put(Signal("server.submit", dict(hit_id = self.hit.id)))
if self.config['dummy_plotter']:
d = strokes2D(self.strokes)
svg = f"""<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg viewBox="0 0 600 600"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.1"
>
<path d="{d}" style="stroke:black;stroke-width:2;fill:none;" />
</svg>
"""
filename = self.hit.getImagePath()
logger.info(f"Write to {filename}")
with open(filename, 'w') as fp:
fp.write(svg)
# we fake a hit.scanned event
self.eventQ.put(Signal('hit.scanned', {'hit_id':self.hit.id}))
return self.hit.uuid
2019-09-11 18:16:33 +02:00
@classmethod
def rmConnection(cls, client):
if client not in cls.connections:
return
cls.connections.remove(client)
2019-10-23 22:33:37 +02:00
class StatusWebSocketHandler(tornado.websocket.WebSocketHandler):
CORS_ORIGINS = ['localhost']
connections = set()
2019-10-30 15:19:32 +01:00
queue = queue.Queue()
2019-10-23 10:56:28 +02:00
2019-11-01 19:09:20 +01:00
def initialize(self, statusPage):
self.statusPage = statusPage
2019-10-23 22:33:37 +02:00
pass
2019-09-11 18:16:33 +02:00
2019-10-23 22:33:37 +02:00
def check_origin(self, origin):
parsed_origin = urlparse(origin)
# parsed_origin.netloc.lower() gives localhost:3333
valid = any([parsed_origin.hostname.endswith(origin) for origin in self.CORS_ORIGINS])
return valid
2019-10-23 10:56:28 +02:00
2019-10-23 22:33:37 +02:00
# the client connected
def open(self, p = None):
self.__class__.connections.add(self)
2019-11-01 20:21:24 +01:00
for prop, value in self.statusPage.__dict__.items():
2019-11-01 19:09:20 +01:00
self.write_message(json.dumps({
'property': prop,
'value': value.isoformat(timespec='seconds') if type(value) is datetime.datetime else value
}))
2019-10-23 22:33:37 +02:00
# client disconnected
def on_close(self):
self.__class__.rmConnection(self)
logger.info(f"Client disconnected: {self.request.remote_ip}")
@classmethod
def rmConnection(cls, client):
if client not in cls.connections:
return
cls.connections.remove(client)
@classmethod
def update_for_all(cls, prop, value):
2019-10-30 15:19:32 +01:00
logger.debug(f"update for all {prop} {value}")
2019-10-23 22:33:37 +02:00
for connection in cls.connections:
connection.write_message(json.dumps({
'property': prop,
2019-11-01 19:09:20 +01:00
'value': value.isoformat(timespec='seconds') if type(value) is datetime.datetime else value
2019-10-23 22:33:37 +02:00
}))
2019-10-23 10:56:28 +02:00
def strokes2D(strokes):
# strokes to a d attribute for a path
d = "";
last_stroke = None;
cmd = "";
for stroke in strokes:
if not last_stroke:
d += f"M{stroke[0]},{stroke[1]} "
cmd = 'M'
else:
if last_stroke[2] == 1:
d += " m"
cmd = 'm'
elif cmd != 'l':
d+=' l '
cmd = 'l'
rel_stroke = [stroke[0] - last_stroke[0], stroke[1] - last_stroke[1]];
d += f"{rel_stroke[0]},{rel_stroke[1]} "
last_stroke = stroke;
return d;
2019-10-23 22:33:37 +02:00
class DrawPageHandler(tornado.web.RequestHandler):
2019-11-01 17:02:38 +01:00
def initialize(self, store: HITStore, eventQ: Queue, path: str, width: int, height: int, draw_width: int, draw_height: int, top_padding: int, left_padding: int, geoip_reader: geoip2.database.Reader):
2019-10-23 22:33:37 +02:00
self.store = store
self.path = path
2019-10-31 13:55:22 +01:00
self.width = width
self.height = height
self.draw_width = draw_width
self.draw_height = draw_height
self.top_padding = top_padding
self.left_padding = left_padding
2019-11-01 17:02:38 +01:00
self.eventQ = eventQ
self.geoip_reader = geoip_reader
2019-10-23 22:33:37 +02:00
def get(self):
try:
2019-11-01 17:02:38 +01:00
hit_id = int(self.get_query_argument('id'))
if hit_id != self.store.currentHit.id:
self.write("Invalid HIT")
return
hit = self.store.currentHit
2019-10-23 22:33:37 +02:00
except Exception:
self.write("HIT not found")
else:
if hit.submit_page_at:
self.write("HIT already submitted")
return
assignmentId = self.get_query_argument('assignmentId', '')
if len(assignmentId) < 1:
logger.critical("Accessing page without assignment id. Allowing it for debug purposes... fingers crossed?")
previewOnly = False
if assignmentId == 'ASSIGNMENT_ID_NOT_AVAILABLE':
previewOnly = True
2019-10-23 22:33:37 +02:00
previous_hit = self.store.getLastSubmittedHit()
if not previous_hit:
# start with basic svg
logger.warning("No previous HIT, start from basic svg")
image = "/basic.svg"
else:
2019-11-02 18:09:21 +01:00
image = previous_hit.getSvgImageUrl()
2019-10-23 22:33:37 +02:00
logger.info(f"Image url: {image}")
self.set_header("Access-Control-Allow-Origin", "*")
2019-10-31 13:55:22 +01:00
contents = open(os.path.join(self.path, 'index.html'), 'r').read()
contents = contents.replace("{IMAGE_URL}", image)\
.replace("{WIDTH}", str(self.width))\
.replace("{HEIGHT}", str(self.height))\
.replace("{DRAW_WIDTH}", str(self.draw_width))\
.replace("{DRAW_HEIGHT}", str(self.draw_height))\
2019-10-31 16:47:11 +01:00
.replace("{TOP_PADDING}", str(self.top_padding))\
.replace("{LEFT_PADDING}", str(self.left_padding))\
.replace("{SCRIPT}", '' if previewOnly else '<script type="text/javascript" src="/assignment.js"></script>')\
.replace("{ASSIGNMENT}", '' if previewOnly else str(assignmentId)) # TODO: fix unsafe inserting of GET variable
2019-10-23 22:33:37 +02:00
self.write(contents)
2019-11-01 17:02:38 +01:00
if 'X-Forwarded-For' in self.request.headers:
ip = self.request.headers['X-Forwarded-For']
else:
ip = self.request.remote_ip
logger.info(f"Request from {ip}")
if not previewOnly:
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, ip=ip)))
try:
geoip = self.geoip_reader.country(ip)
logger.debug(f"Geo {geoip}")
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, location=geoip.country.name)))
except Exception as e:
logger.exception(e)
logger.info("No geo IP possible")
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, location='Unknown')))
2019-10-23 10:56:28 +02:00
2019-10-31 14:35:24 +01:00
class BackendHandler(tornado.web.RequestHandler):
def initialize(self, store: HITStore, path: str):
self.store = store
self.path = path
def get(self):
rows = []
for hit in self.store.getHITs(100):
if hit.submit_hit_at and hit.accept_time:
seconds = (hit.submit_hit_at - hit.accept_time).total_seconds()
duration_m = int(seconds/60)
duration_s = max(int(seconds%60), 0)
duration = (f"{duration_m}m" if duration_m else "") + f"{duration_s:02d}s"
else:
duration = "-"
fee = f"${hit.fee:.2}" if hit.fee else "-"
rows.append(
f"""
<tr><td></td><td>{hit.worker_id}</td>
2019-10-31 14:35:24 +01:00
<td>{hit.turk_ip}</td>
<td>{hit.turk_country}</td>
<td>{fee}</td>
<td>{hit.accept_time}</td>
<td>{duration}</td><td></td>
"""
)
contents = open(os.path.join(self.path, 'backend.html'), 'r').read()
contents = contents.replace("{{TBODY}}", "".join(rows))
self.write(contents)
2019-10-23 22:33:37 +02:00
class StatusPage():
"""
Properties for on the status page, which are send over websockets the moment
they are altered.
"""
def __init__(self):
self.reset()
def reset(self):
2019-10-30 12:44:25 +01:00
logger.info("Resetting status")
2019-10-23 22:33:37 +02:00
self.hit_id = None
self.worker_id = None
self.ip = None
self.location = None
self.browser = None
self.os = None
self.resolution = None
self.state = None
self.fee = None
self.hit_created = None
self.hit_opened = None
2019-11-01 17:41:16 +01:00
self.hit_submitted = None
2019-11-02 22:34:44 +01:00
def clearAssignment(self):
logger.info("Resetting hit assignment")
self.worker_id = None
self.ip = None
self.location = None
self.browser = None
self.os = None
self.resolution = None
self.hit_created = None
2019-10-23 22:33:37 +02:00
2019-10-30 12:44:25 +01:00
def __setattr__(self, name, value):
if name in self.__dict__ and self.__dict__[name] == value:
logger.debug(f"Ignore setting status of {name}: it already is set to {value}")
return
2019-10-23 22:33:37 +02:00
self.__dict__[name] =value
2019-10-30 12:44:25 +01:00
logger.info(f"Update status: {name}: {value}")
2019-10-30 15:19:32 +01:00
if Server.loop:
Server.loop.asyncio_loop.call_soon_threadsafe(StatusWebSocketHandler.update_for_all, name, value)
else:
logger.warn("Status: no server loop to call update command")
2019-10-23 22:33:37 +02:00
def set(self, name, value):
return self.__setattr__(name, value)
2019-10-23 10:56:28 +02:00
class Server:
"""
Server for HIT -> plotter events
As well as for the Status interface
2019-10-23 10:56:28 +02:00
TODO: change to have the HIT_id as param to the page. Load hit from storage with previous image
"""
2019-10-30 15:19:32 +01:00
loop = None
2019-10-23 22:33:37 +02:00
def __init__(self, config, eventQ: Queue, runningEvent: Event, plotterQ: Queue, store: HITStore):
2019-10-23 10:56:28 +02:00
self.isRunning = runningEvent
self.eventQ = eventQ
self.config = config
self.logger = logger
2019-10-23 10:56:28 +02:00
self.plotterQ = plotterQ # communicate directly to plotter (skip main thread)
2019-10-23 10:56:28 +02:00
#self.config['server']['port']
2019-10-23 22:33:37 +02:00
self.web_root = os.path.join('www')
2019-10-23 10:56:28 +02:00
self.server_loop = None
2019-10-23 22:33:37 +02:00
self.store = store
self.statusPage = StatusPage()
2019-10-23 10:56:28 +02:00
2019-10-30 15:19:32 +01:00
2019-10-23 10:56:28 +02:00
def start(self):
2019-10-23 22:33:37 +02:00
if not os.path.exists('GeoLite2-Country.mmdb'):
raise Exception("Please download the GeoLite2 Country database and place the 'GeoLite2-Country.mmdb' file in the project root.")
self.geoip_reader = geoip2.database.Reader('GeoLite2-Country.mmdb')
2019-10-23 10:56:28 +02:00
try:
asyncio.set_event_loop(asyncio.new_event_loop())
application = tornado.web.Application([
2019-10-23 22:33:37 +02:00
(r"/ws(.*)", WebSocketHandler, {
'config': self.config,
'plotterQ': self.plotterQ,
'eventQ': self.eventQ,
'store': self.store,
}),
2019-11-01 19:09:20 +01:00
(r"/status/ws", StatusWebSocketHandler, dict(statusPage = self.statusPage)),
2019-10-23 22:33:37 +02:00
(r"/draw", DrawPageHandler,
2019-10-31 13:55:22 +01:00
dict(
store = self.store,
2019-11-01 17:02:38 +01:00
eventQ = self.eventQ,
2019-10-31 13:55:22 +01:00
path=self.web_root,
width=self.config['scanner']['width'],
height=self.config['scanner']['height'],
draw_width=self.config['scanner']['draw_width'],
draw_height=self.config['scanner']['draw_height'],
top_padding=self.config['scanner']['top_padding'],
2019-11-01 17:02:38 +01:00
left_padding=self.config['scanner']['left_padding'],
geoip_reader= self.geoip_reader
2019-10-31 13:55:22 +01:00
)),
2019-10-31 14:35:24 +01:00
(r"/backend", BackendHandler,
dict(
store = self.store,
path=self.web_root,
)),
2019-10-23 10:56:28 +02:00
(r"/(.*)", StaticFileWithHeaderHandler,
2019-10-23 22:33:37 +02:00
{"path": self.web_root}),
2019-10-23 10:56:28 +02:00
], debug=True, autoreload=False)
application.listen(self.config['server']['port'])
self.server_loop = tornado.ioloop.IOLoop.current()
2019-10-30 15:19:32 +01:00
Server.loop = self.server_loop
2019-10-23 10:56:28 +02:00
if self.isRunning.is_set():
self.server_loop.start()
finally:
self.logger.info("Stopping webserver")
self.isRunning.clear()
def stop(self):
if self.server_loop:
self.logger.debug("Got call to stop")
self.server_loop.asyncio_loop.call_soon_threadsafe(self._stop)
2019-10-23 22:33:37 +02:00
2019-10-23 10:56:28 +02:00
def _stop(self):
self.server_loop.stop()