guest_worker/sorteerhoed/HITStore.py

139 lines
4.2 KiB
Python
Raw Normal View History

2019-10-23 08:56:28 +00:00
import logging
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import ForeignKey, Sequence
from sqlalchemy.engine import create_engine
from sqlalchemy.orm.session import sessionmaker
import datetime
from contextlib import contextmanager
import uuid
import os
import coloredlogs
import argparse
from sqlalchemy.sql.functions import func
mainLogger = logging.getLogger("sorteerhoed")
logger = mainLogger.getChild("store")
Base = declarative_base()
"""
HIT lifetime:
created
accepted
(returned!)
working
awaiting amazon confirmation (submitted on page)
submitted
Actions:
creating Hit (creating hit with scanned image)
Scanning
"""
class HIT(Base):
__tablename__ = 'hits'
id = Column(Integer, Sequence('hit_id'), primary_key=True) # our sequential hit id
hit_id = Column(String(255)) # amazon's hit id
created_at = Column(DateTime, default=datetime.datetime.now())
updated_at = Column(DateTime, default=datetime.datetime.now())
2019-10-23 20:33:37 +00:00
uuid = Column(String(32), default=lambda : uuid.uuid4().hex)
2019-10-23 08:56:28 +00:00
assignment_id = Column(String(255), default = None)
worker_id = Column(String(255), default = None)
accept_time = Column(DateTime, default=None)
open_page_at = Column(DateTime, default=None)
submit_page_at = Column(DateTime, default=None)
submit_hit_at = Column(DateTime, default=None)
answer = Column(String(255), default=None)
turk_ip = Column(String(255), default=None)
turk_country = Column(String(255), default=None)
turk_screen_width = Column(Integer, default = None)
turk_screen_height = Column(Integer, default = None)
2019-10-23 20:33:37 +00:00
def getImagePath(self):
return os.path.join('www', self.getImageUrl())
def getImageUrl(self):
return f"scans/{self.id}.png"
2019-10-23 08:56:28 +00:00
class Store:
def __init__(self, db_filename, logLevel=0):
path = os.path.abspath(db_filename)
if logLevel <= logging.DEBUG:
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
2019-10-23 20:33:37 +00:00
self.engine = create_engine('sqlite:///'+path, echo=False, connect_args={'check_same_thread': False})
2019-10-23 08:56:28 +00:00
Base.metadata.create_all(self.engine)
self.Session = sessionmaker(bind=self.engine)
2019-10-23 20:33:37 +00:00
self.session = self.Session()
2019-10-23 08:56:28 +00:00
@contextmanager
def getSession(self):
"""Provide a transactional scope around a series of operations."""
try:
2019-10-23 20:33:37 +00:00
yield self.session
self.session.commit()
2019-10-23 08:56:28 +00:00
except:
2019-10-23 20:33:37 +00:00
self.session.rollback()
2019-10-23 08:56:28 +00:00
raise
def getHits(self, session):
2019-10-23 20:33:37 +00:00
return self.session.query(Source).order_by(HIT.created_at.desc())
def getHitById(self, hitId):
return self.session.query(HIT).\
filter(HIT.id==hitId).one()
def getHitByRemoteId(self, amazonHitId):
return self.session.query(HIT).\
filter(HIT.hit_id==amazonHitId).one()
def getLastSubmittedHit(self):
return self.session.query(HIT).\
filter(HIT.submit_page_at!=None).\
order_by(HIT.submit_page_at.desc()).first()
def createHIT(self):
with self.getSession() as s:
hit = HIT()
s.add(hit)
s.flush()
s.refresh(hit)
logger.info(f"Created HIT {hit.id}")
return hit
def saveHIT(self, hit):
with self.getSession() as s:
logger.info(f"Updating hit! {hit.id}")
# s.flush()
2019-10-23 08:56:28 +00:00
def addHIT(self, hit: HIT):
with self.getSession() as s:
s.add(hit)
s.flush()
s.refresh(hit)
2019-10-23 20:33:37 +00:00
logger.info(f"Added {hit.id}")
2019-10-23 08:56:28 +00:00
# def rmSource(self, id: int):
# with self.getSession() as session:
# source = session.query(Source).get(id)
# if not source:
# logging.warning(f"Source nr {id} not found")
# else:
# logging.info(f"Deleting source {source.id}: {source.url}")
# session.delete(source)
#
# def getRandomNewsItem(self, session) -> NewsItem:
# return session.query(NewsItem).order_by(func.random()).limit(1).first()