WIP. assignment has separate table. and prepare for more advanced status display
This commit is contained in:
parent
5d8ef56686
commit
ac04fb6082
4 changed files with 302 additions and 137 deletions
|
@ -42,35 +42,34 @@ class HIT(Base):
|
|||
hit_id = Column(String(255)) # amazon's hit id
|
||||
created_at = Column(DateTime, default=datetime.datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.datetime.utcnow)
|
||||
uuid = Column(String(32), default=lambda : uuid.uuid4().hex)
|
||||
assignment_id = Column(String(255), default = None)
|
||||
worker_id = Column(String(255), default = None)
|
||||
accept_time = Column(DateTime, default=None)
|
||||
open_page_at = Column(DateTime, default=None)
|
||||
submit_page_at = Column(DateTime, default=None)
|
||||
submit_hit_at = Column(DateTime, default=None)
|
||||
answer = Column(String(255), default=None)
|
||||
turk_ip = Column(String(255), default=None)
|
||||
turk_country = Column(String(255), default=None)
|
||||
turk_screen_width = Column(Integer, default = None)
|
||||
turk_screen_height = Column(Integer, default = None)
|
||||
scanned_at = Column(DateTime, default=None)
|
||||
deleted_at = Column(DateTime, default=None)
|
||||
assignments = relationship("Assignment", back_populates="hit", order_by="Assignment.created_at")
|
||||
fee = Column(Float(precision=2), default=None)
|
||||
abandoned = False
|
||||
|
||||
# previous hit so we can load the corrent image
|
||||
# previous_hit_id = Column(Integer, ForeignKey('hits.id'), default=None)
|
||||
# previous_hit = relationship("HIT")
|
||||
|
||||
def getImagePath(self):
|
||||
return os.path.join('scanimation/interfaces/frames', f"{self.id:06d}.jpg")
|
||||
|
||||
# def getImageUrl(self):
|
||||
# return f"{self.id}.jpg"
|
||||
|
||||
def getSvgImageUrl(self):
|
||||
return f"scans/{self.id:06d}.svg"
|
||||
|
||||
def getSvgImagePath(self):
|
||||
return os.path.join('www', self.getSvgImageUrl())
|
||||
|
||||
def getLastAssignment(self):
|
||||
if not len(self.assignments):
|
||||
return None
|
||||
return self.assignments[-1]
|
||||
|
||||
def getAssignmentById(self, assignmentId):
|
||||
for a in self.assignments:
|
||||
if a.assignment_id == assignmentId:
|
||||
return
|
||||
|
||||
def getStatus(self):
|
||||
if self.scanned_at:
|
||||
return "completed"
|
||||
|
@ -87,7 +86,29 @@ class HIT(Base):
|
|||
return "abandoned by worker"
|
||||
return "awaiting worker"
|
||||
|
||||
class Assignment(Base):
|
||||
__tablename__ = 'assignments'
|
||||
id = Column(Integer, Sequence('assignment_id'), primary_key=True) # our sequential hit id
|
||||
assignment_id = Column(String(255)) # amazon's assignment id
|
||||
hit_id = Column(Integer, ForeignKey('hits.id')) # our sequential hit id
|
||||
hit = relationship("HIT", back_populates="assignments")
|
||||
uuid = Column(String(32), default=lambda : uuid.uuid4().hex)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.datetime.utcnow)
|
||||
|
||||
assignment_id = Column(String(255), default = None)
|
||||
worker_id = Column(String(255), default = None)
|
||||
accept_at = Column(DateTime, default=None)
|
||||
# open_page_at = Column(DateTime, default=None)
|
||||
submit_page_at = Column(DateTime, default=None) # Submit the page
|
||||
confirmed_at = Column(DateTime, default=None) # validate with UUID when getting Message from Amazon
|
||||
abandoned_at = Column(DateTime, default=None)
|
||||
rejected_at = Column(DateTime, default=None)
|
||||
|
||||
answer = Column(String(255), default=None)
|
||||
turk_ip = Column(String(255), default=None)
|
||||
turk_country = Column(String(255), default=None)
|
||||
|
||||
class Store:
|
||||
def __init__(self, db_filename, logLevel=0):
|
||||
|
@ -129,7 +150,7 @@ class Store:
|
|||
filter(HIT.submit_page_at!=None).\
|
||||
order_by(HIT.submit_page_at.desc()).first()
|
||||
|
||||
def createHIT(self):
|
||||
def createHIT(self) -> HIT:
|
||||
with self.getSession() as s:
|
||||
hit = HIT()
|
||||
s.add(hit)
|
||||
|
@ -138,6 +159,16 @@ class Store:
|
|||
logger.info(f"Created HIT {hit.id}")
|
||||
return hit
|
||||
|
||||
def newAssignment(self, hit: HIT) -> Assignment:
|
||||
with self.getSession() as s:
|
||||
assignment = Assignment()
|
||||
hit.assignments.append(assignment)
|
||||
s.add(assignment)
|
||||
s.flush()
|
||||
s.refresh(hit)
|
||||
logger.info(f"Created Assignment {assignment.id}")
|
||||
return assignment
|
||||
|
||||
def saveHIT(self, hit):
|
||||
with self.getSession() as s:
|
||||
logger.info(f"Updating hit! {hit.id}")
|
||||
|
@ -184,4 +215,3 @@ class Store:
|
|||
#
|
||||
# def getRandomNewsItem(self, session) -> NewsItem:
|
||||
# return session.query(NewsItem).order_by(func.random()).limit(1).first()
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@ class CentralManagement():
|
|||
self.lastHitTime = None
|
||||
|
||||
self.eventQueue = Queue()
|
||||
self.statusPageQueue = Queue()
|
||||
self.isRunning = threading.Event()
|
||||
self.isScanning = threading.Event()
|
||||
self.scanLock = threading.Lock()
|
||||
|
@ -210,38 +211,67 @@ class CentralManagement():
|
|||
if signal.name == 'start':
|
||||
self.makeHit()
|
||||
self.lastHitTime = datetime.datetime.now()
|
||||
pass
|
||||
elif signal.name == 'hit.scan':
|
||||
if signal.params['id'] != self.currentHit.id:
|
||||
self.logger.info(f"Hit.scanned had wrong id: {signal}")
|
||||
continue
|
||||
self.statusPageQueue.add(dict(hit_id=signal.params['id'], transition='scanning'))
|
||||
|
||||
elif signal.name == 'hit.scanned':
|
||||
# TODO: wrap up hit & make new HIT
|
||||
if signal.params['id'] != self.currentHit.id:
|
||||
self.logger.info(f"Hit.scanned had wrong id: {signal}")
|
||||
continue
|
||||
|
||||
self.currentHit.scanned_at = datetime.datetime.utcnow()
|
||||
self.server.statusPage.set('state', self.currentHit.getStatus())
|
||||
time_diff = datetime.datetime.now() - self.lastHitTime
|
||||
to_wait = 10 - time_diff.total_seconds()
|
||||
self.statusPageQueue.add(dict(hit_id=self.currentHit.id, state='scan'))
|
||||
|
||||
if to_wait > 0:
|
||||
self.logger.warn(f"Sleep until next hit: {to_wait}s")
|
||||
time.sleep(to_wait)
|
||||
else:
|
||||
self.logger.info(f"No need to wait: {to_wait}s")
|
||||
|
||||
self.makeHit()
|
||||
self.lastHitTime = datetime.datetime.now()
|
||||
elif signal.name == 'hit.creating':
|
||||
self.statusPageQueue.add(dict(hit_id=signal.params['id'], transition='create_hit'))
|
||||
elif signal.name == 'hit.created':
|
||||
self.statusPageQueue.add(dict(hit_id=signal.params['id'], remote_id=signal.params['remote_id'], state='hit'))
|
||||
|
||||
elif signal.name == 'scan.start':
|
||||
pass
|
||||
elif signal.name == 'scan.finished':
|
||||
# probably see hit.scanned
|
||||
pass
|
||||
elif signal.name == 'hit.info':
|
||||
if signal.params['hit_id'] != self.currentHit.id:
|
||||
self.logger.warning(f"hit.info hit_id != currenthit.id: {signal}")
|
||||
continue
|
||||
for name, value in signal.params.items():
|
||||
if name == 'hit_id':
|
||||
continue
|
||||
if name == 'ip':
|
||||
self.currentHit.turk_ip = value
|
||||
if name == 'location':
|
||||
self.currentHit.turk_country = value
|
||||
|
||||
self.logger.debug(f'Set status: {name} to {value}')
|
||||
elif signal.name == 'hit.assignment':
|
||||
# Create new assignment
|
||||
if signal.params['hit_id'] != self.currentHit.id:
|
||||
continue
|
||||
|
||||
assignment = self.store.newAssignment(self.currentHit)
|
||||
assignment.assignment_id = signal.params['assignment_id']
|
||||
self.store.saveAssignment(assignment)
|
||||
|
||||
self.statusPageQueue.add(dict(hit_id=self.currentHit.id, assignment_id=assignment.assignment_id, state='assignment'))
|
||||
|
||||
elif signal.name == 'assignment.info':
|
||||
assignment = self.currentHit.getAssignmentById(signal.params['assignment_id'])
|
||||
if not assignment:
|
||||
self.logger.warning(f"assignment.info assignment.id not for current hit assignments: {signal}")
|
||||
|
||||
for name, value in signal.params.items():
|
||||
if name == 'ip':
|
||||
assignment.turk_ip = value
|
||||
if name == 'location':
|
||||
assignment.turk_country = value
|
||||
|
||||
self.logger.debug(f'Set assignment: {name} to {value}')
|
||||
self.server.statusPage.set(name, value)
|
||||
|
||||
elif signal.name == 'server.open':
|
||||
self.currentHit.open_page_at = datetime.datetime.utcnow()
|
||||
self.store.saveHIT(self.currentHit)
|
||||
|
@ -345,12 +375,13 @@ class CentralManagement():
|
|||
def makeHit(self):
|
||||
self.expireCurrentHit() # expire hit if it is there
|
||||
|
||||
self.eventQueue.put(Signal('hit.creating', {'id': self.currentHit.id if self.currentHit else 'start'}))
|
||||
|
||||
self.server.statusPage.reset()
|
||||
self.reloadConfig() # reload new config values if they are set
|
||||
|
||||
# self.notPaused.wait()
|
||||
|
||||
|
||||
self.currentHit = self.store.createHIT()
|
||||
self.store.currentHit = self.currentHit
|
||||
|
||||
|
@ -392,10 +423,12 @@ class CentralManagement():
|
|||
|
||||
self.store.saveHIT(self.currentHit)
|
||||
# TODO: have HITStore/HIT take care of this by emitting a signal
|
||||
self.server.statusPage.set('hit_id', new_hit['HIT']['HITId'])
|
||||
self.server.statusPage.set('hit_created', self.currentHit.created_at)
|
||||
self.server.statusPage.set('fee', f"${self.currentHit.fee:.2f}")
|
||||
self.server.statusPage.set('state', self.currentHit.getStatus())
|
||||
# self.server.statusPage.set('hit_id', new_hit['HIT']['HITId'])
|
||||
# self.server.statusPage.set('hit_created', self.currentHit.created_at)
|
||||
# self.server.statusPage.set('fee', f"${self.currentHit.fee:.2f}")
|
||||
# self.server.statusPage.set('state', self.currentHit.getStatus())
|
||||
|
||||
self.eventQueue.put(Signal('hit.created', {'id': self.currentHit.id, 'remote_id': self.currentHit.hit_id}))
|
||||
|
||||
# mturk.send_test_event_notification()
|
||||
if self.config['amazon']['sqs_url']:
|
||||
|
@ -469,6 +502,7 @@ class CentralManagement():
|
|||
filename = self.currentHit.getImagePath()
|
||||
|
||||
with self.scanLock:
|
||||
self.eventQueue.put(Signal('hit.scan', {'id':self.currentHit.id}))
|
||||
self.eventQueue.put(Signal('scan.start'))
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
# opens connection to scanner, but only starts scanning when output becomes ready:
|
||||
|
@ -491,7 +525,7 @@ class CentralManagement():
|
|||
|
||||
time.sleep(5) # sleep a few seconds for scanner to return to start position
|
||||
|
||||
self.eventQueue.put(Signal('hit.scanned', {'hit_id':self.currentHit.id}))
|
||||
self.eventQueue.put(Signal('hit.scanned', {'id':self.currentHit.id}))
|
||||
self.eventQueue.put(Signal('scan.finished'))
|
||||
|
||||
def setLight(self, on):
|
||||
|
|
89
sorteerhoed/statemachine.py
Normal file
89
sorteerhoed/statemachine.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import datetime
|
||||
|
||||
class State():
|
||||
def __init__(self, hit_id):
|
||||
self.time = datetime.datetime.now()
|
||||
self.hit_id = params['hit_id']
|
||||
|
||||
def transition(self, transitionName, params = {}):
|
||||
raise Exception("Not implemented")
|
||||
|
||||
class StateMachine:
|
||||
def __init__(self, initalState):
|
||||
self.history = [('init',initialState)]
|
||||
|
||||
def current(self):
|
||||
return self.history[-1][1]
|
||||
|
||||
def transition(self, transitionName, params):
|
||||
# TODO: update Store & Interface
|
||||
if transitionName not in self.current().availableTransitions:
|
||||
raise Exception("Invalid transition")
|
||||
|
||||
newState = self.current().transition(transitionName, params)
|
||||
if not newState:
|
||||
raise RuntimeException(f"Invalid transition {transitionName} for {self.current()}")
|
||||
|
||||
self.history.append((transitionName, newState))
|
||||
|
||||
def getStateForHit(self, hit_id, stateCls = None):
|
||||
states = [s for s in self.history if s[1].hit_id == hit_id and (stateCls is None or isinstance(s[1], stateCls))]
|
||||
if len(states < 1):
|
||||
return None
|
||||
return states[-1]
|
||||
|
||||
class HITCreated(State):
|
||||
availableTransitions = ['accept']
|
||||
|
||||
self.state = None
|
||||
self.fee = None
|
||||
self.hit_created = None
|
||||
self.hit_opened = None
|
||||
self.hit_submitted = None
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'accept':
|
||||
return HITAssigned(params['hit_id'], params['assignment_id'])
|
||||
|
||||
class HITAssigned(State):
|
||||
availableTransitions = ['reject', 'abandon', 'submit']
|
||||
|
||||
def __init__(self, hit_id):
|
||||
self.assignment_id = None
|
||||
self.worker_id = None
|
||||
self.ip = None
|
||||
self.location = None
|
||||
self.browser = None
|
||||
self.os = None
|
||||
self.resolution = None
|
||||
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'reject' or transitionName == 'abandon':
|
||||
return HITAbandonedRejected(params['hit_id'])
|
||||
if transitionName == 'submit':
|
||||
return HITSubmitted(params['hit_id'])
|
||||
|
||||
class HITAbandonedRejected(State):
|
||||
availableTransitions = ['accept']
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'accept':
|
||||
return HITAssigned(params['hit_id'])
|
||||
|
||||
class HITSubmitted(State):
|
||||
availableTransitions = ['scan']
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'scan':
|
||||
return Scanning(params['hit_id'])
|
||||
|
||||
class Scanning(State):
|
||||
availableTransitions = ['scan_complete', 'scan_failed']
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'scan_complete':
|
||||
return ImageAvailable(params['hit_id'])
|
||||
if transitionName == 'scan_failed':
|
||||
raise Exception("Scan failed, unknown state")
|
||||
|
||||
class ImageAvailable(State):
|
||||
availableTransitions = ['create_hit']
|
||||
def transition(self, transitionName, params = {}):
|
||||
if transitionName == 'create_hit':
|
||||
return HITCreated(params['hit_id'])
|
|
@ -38,6 +38,9 @@ class StaticFileWithHeaderHandler(tornado.web.StaticFileHandler):
|
|||
|
||||
|
||||
class WebSocketHandler(tornado.websocket.WebSocketHandler):
|
||||
"""
|
||||
Websocket from the workers
|
||||
"""
|
||||
CORS_ORIGINS = ['localhost', '.mturk.com', 'here.rubenvandeven.com', 'guest.rubenvandeven.com']
|
||||
connections = set()
|
||||
|
||||
|
@ -63,27 +66,26 @@ class WebSocketHandler(tornado.websocket.WebSocketHandler):
|
|||
|
||||
self.hit = self.store.currentHit
|
||||
|
||||
|
||||
self.assignment_id = int(self.get_query_argument('assignment_id'))
|
||||
|
||||
self.timeout = datetime.datetime.now() + datetime.timedelta(seconds=self.store.getHitTimeout())
|
||||
|
||||
if self.hit.submit_hit_at:
|
||||
raise Exception("Opening websocket for already submitted hit")
|
||||
|
||||
#logger.info(f"New client connected: {self.request.remote_ip} for {self.hit.id}/{self.hit.hit_id}")
|
||||
self.eventQ.put(Signal('server.open', dict(hit_id=self.hit.id)))
|
||||
self.eventQ.put(Signal('server.open', dict(assignment_id=self.assignment_id)))
|
||||
self.strokes = []
|
||||
|
||||
# Gather some initial information:
|
||||
ua = self.request.headers.get('User-Agent', None)
|
||||
if ua:
|
||||
ua_info = httpagentparser.detect(ua)
|
||||
self.eventQ.put(Signal('hit.info', dict(hit_id=self.hit.id, os=ua_info['os']['name'], browser=ua_info['browser']['name'])))
|
||||
|
||||
# self.write_message("hello!")
|
||||
|
||||
# the client sent the message
|
||||
def on_message(self, message):
|
||||
logger.debug(f"recieve: {message}")
|
||||
|
||||
if self.assignment_id != self.hit.getLastAssignment().assignment_id:
|
||||
logger.critical(f"Skip message for non-last assignment {message}")
|
||||
|
||||
if datetime.datetime.now() > self.timeout:
|
||||
logger.critical("Close websocket after timeout (abandon?)")
|
||||
self.close()
|
||||
|
@ -325,16 +327,24 @@ class DrawPageHandler(tornado.web.RequestHandler):
|
|||
logger.info(f"Request from {ip}")
|
||||
|
||||
if not previewOnly:
|
||||
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, ip=ip)))
|
||||
self.eventQ.put(Signal('hit.assignment', dict(
|
||||
hit_id=hit.id, ip=ip, assignment_id=assignmentId
|
||||
)))
|
||||
# self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, ip=ip)))
|
||||
|
||||
try:
|
||||
geoip = self.geoip_reader.country(ip)
|
||||
logger.debug(f"Geo {geoip}")
|
||||
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, location=geoip.country.name)))
|
||||
self.eventQ.put(Signal('assignment.info', dict(assignment_id=assignmentId, location=geoip.country.name)))
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
logger.info("No geo IP possible")
|
||||
self.eventQ.put(Signal('hit.info', dict(hit_id=hit.id, location='Unknown')))
|
||||
self.eventQ.put(Signal('assignment.info', dict(assignment_id=assignmentId, location='Unknown')))
|
||||
|
||||
ua = self.request.headers.get('User-Agent', None)
|
||||
if ua:
|
||||
ua_info = httpagentparser.detect(ua)
|
||||
self.eventQ.put(Signal('assignment.info', dict(assignment_id=assignmentId, os=ua_info['os']['name'], browser=ua_info['browser']['name'])))
|
||||
|
||||
class BackendHandler(tornado.web.RequestHandler):
|
||||
def initialize(self, store: HITStore, path: str):
|
||||
|
@ -418,6 +428,8 @@ class StatusPage():
|
|||
def set(self, name, value):
|
||||
return self.__setattr__(name, value)
|
||||
|
||||
|
||||
|
||||
class Server:
|
||||
"""
|
||||
Server for HIT -> plotter events
|
||||
|
|
Loading…
Reference in a new issue