From 5a2f17445eac197d5e13efe36e363120284febcb Mon Sep 17 00:00:00 2001
From: Ruben van de Ven <git@rubenvandeven.com>
Date: Thu, 17 Dec 2020 14:57:03 +0100
Subject: [PATCH] create database scripts

---
 .gitignore       |   3 +-
 README.md        |  23 ++++
 coco/__init__.py |   0
 coco/coco.sql    |  45 +++++++
 coco/storage.py  | 321 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |   6 +
 tools.py         |  94 ++++++++++++++
 7 files changed, 491 insertions(+), 1 deletion(-)
 create mode 100644 README.md
 create mode 100644 coco/__init__.py
 create mode 100644 coco/coco.sql
 create mode 100644 coco/storage.py
 create mode 100644 requirements.txt
 create mode 100644 tools.py

diff --git a/.gitignore b/.gitignore
index 354a328..4b021e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-dataset/
\ No newline at end of file
+dataset/
+*.pyc
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3a8b1ae
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+# Auto-accept : part of the Accept & Work series
+
+## Install:
+
+```bash
+virtualenv --system-site-packages -p  python3 venv
+source venv/bin/activate
+./download_dataset.sh
+python tools.py create 
+```
+
+To use a custom dataset version (defaults to the smaller validation 2017 set)
+
+```bash
+python tools.py create --annotations FILENAME.json --db dataset/NAME.db
+```
+
+
+
+## Server
+
+Python server. Based on work for the [COCO dataset interface](https://git.rubenvandeven.com/plottingdata/coco) for [Plotting Data](http://plottingd.at/a).
+
diff --git a/coco/__init__.py b/coco/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/coco/coco.sql b/coco/coco.sql
new file mode 100644
index 0000000..affd752
--- /dev/null
+++ b/coco/coco.sql
@@ -0,0 +1,45 @@
+BEGIN TRANSACTION;
+CREATE TABLE IF NOT EXISTS "segments" (
+	"id"	INTEGER UNIQUE,
+	"annotation_id"	INTEGER,
+	"points"	TEXT,
+	PRIMARY KEY("id")
+) WITHOUT ROWID;
+CREATE TABLE IF NOT EXISTS "categories" (
+	"id"	INTEGER UNIQUE,
+	"supercategory"	TEXT,
+	"name"	TEXT UNIQUE,
+	PRIMARY KEY("id")
+) WITHOUT ROWID;
+CREATE TABLE IF NOT EXISTS "images" (
+	"id"	INTEGER UNIQUE,
+	"flickr_url"	TEXT,
+	"coco_url"	TEXT,
+	"width"	FLOAT,
+	"height"	FLOAT,
+	"date_captured"	DATETIME,
+	PRIMARY KEY("id","id")
+) WITHOUT ROWID;
+CREATE TABLE IF NOT EXISTS "annotations" (
+	"id"	INTEGER UNIQUE,
+	"image_id"	INTEGER,
+	"category_id"	INTEGER,
+	"iscrowd"	BOOL,
+	"area"	FLOAT,
+	"bbox_top"	FLOAT,
+	"bbox_left"	FLOAT,
+	"bbox_width"	FLOAT,
+	"bbox_height"	FLOAT,
+	"zerkine_moments"	TEXT DEFAULT NULL,
+	PRIMARY KEY("id")
+) WITHOUT ROWID;
+CREATE INDEX IF NOT EXISTS "segments_annotation" ON "segments" (
+	"annotation_id"
+);
+CREATE INDEX IF NOT EXISTS "annotations_image" ON "annotations" (
+	"image_id"
+);
+CREATE INDEX IF NOT EXISTS "annotations_category" ON "annotations" (
+	"category_id"
+);
+COMMIT;
diff --git a/coco/storage.py b/coco/storage.py
new file mode 100644
index 0000000..50e40dc
--- /dev/null
+++ b/coco/storage.py
@@ -0,0 +1,321 @@
+import logging
+import os
+import sqlite3
+import pycocotools.coco
+import ast
+import copy
+import svgwrite
+
+logger = logging.getLogger('coco.storage')
+
+class Annotation:
+    def __init__(self, result, storage):
+        self.storage = storage
+        self.id = result['id']
+        self.image_id = result['image_id']
+        self.category_id = result['category_id']
+        self.iscrowd = bool(result['iscrowd'])
+        self.area = result['area']
+        self.bbox = [result['bbox_left'], result['bbox_top'], result['bbox_width'], result['bbox_height']]
+        self.segments = self.fetchSegments()
+        self.is_normalised = False
+        if type(result['zerkine_moment']) is list:
+            self.zerkine_moment = result['zerkine_moment'] # when normalising, this is already there
+        else:
+            self.zerkine_moment = self.parseZerkineFromDB(result['zerkine_moment']) if result['zerkine_moment'] else None
+
+    @classmethod
+    def parseZerkineFromDB(cls, r):
+        z = r.split(' ')
+        return [float(i) for i in z]
+
+    def fetchSegments(self):
+        try:
+            cur = self.storage.con.cursor()
+            cur.execute("SELECT * FROM segments WHERE annotation_id = :id AND points != 'ount' AND points != 'iz'", {'id': self.id})
+            segments = []
+            for row in cur:
+                segments.append(Segment(row))
+        except Exception as e:
+            logger.critical(f"Invalid segment for annotation {self.id}")
+            logger.exception(e)
+            raise(e)
+        return segments
+
+    def getNormalised(self, width, height) -> 'Annotation':
+        '''
+        center segments in boundig box with given width and height, and on point 0,0
+        '''
+        scale = min(width/self.bbox[2], height/self.bbox[3])
+        logger.debug(f"Normalise from bbox: {self.bbox}")
+        new_width = self.bbox[2] * scale
+        new_height = self.bbox[3] * scale
+
+        dx = (width - new_width) / 2
+        dy = (height - new_height) / 2
+
+        data = self.forJson()
+        data['bbox_left'] = 0
+        data['bbox_top'] = 0
+        data['bbox_width'] = new_width
+        data['bbox_height'] = new_height
+        newAnn = Annotation(data, self.storage)
+        newAnn.is_normalised = True
+        newAnn.bbox_original = self.bbox
+        newAnn.scale = scale
+
+        for i, segment in enumerate(newAnn.segments):
+            newAnn.segments[i].points = [[
+                (p[0]-self.bbox[0]) * scale,
+                (p[1]-self.bbox[1]) * scale
+                ] for p in segment.points]
+
+
+        return newAnn
+
+    def forJson(self):
+        data = self.__dict__.copy()
+        del data['storage']
+        data['image'] = self.storage.getImage(data['image_id'])
+        return data
+
+    def writeToDrawing(self, dwg, **pathSpecs):
+        for segment in self.segments:
+            if len(pathSpecs) == 0:
+                pathSpecs['fill'] = 'white'
+            dwg.add(svgwrite.path.Path(segment.getD(), class_=f"cat_{self.category_id}", **pathSpecs))
+
+    def getTranslationToCenter(self):
+        dimensions = (self.bbox[2], self.bbox[3])
+        targetSize = max(dimensions)
+        dx = (dimensions[0] - targetSize)/2
+        dy = (dimensions[1] - targetSize)/2
+        return (dx, dy)
+    
+    def asSvg(self, filename, square=False, bg=None):
+        dimensions = (self.bbox[2], self.bbox[3])
+        viewbox = copy.copy(self.bbox)
+        if square:
+            targetSize = max(dimensions)
+            dx = (dimensions[0] - targetSize)/2
+            dy = (dimensions[1] - targetSize)/2
+            viewbox[2] = targetSize
+            viewbox[3] = targetSize
+            dimensions = (targetSize, targetSize)
+            viewbox[0] += dx
+            viewbox[1] += dy
+        dwg = svgwrite.Drawing(
+            filename,
+            size=dimensions,
+            viewBox=" ".join([str(s) for s in viewbox])
+            )
+
+        if bg:
+            dwg.add(dwg.rect(
+                (viewbox[0],viewbox[1]),
+                (viewbox[2],viewbox[3]),
+                fill=bg))
+        self.writeToDrawing(dwg)
+        return dwg
+
+class Segment():
+    def __init__(self, result):
+        try:
+            self.points = self.asCoordinates(ast.literal_eval('['+result['points']+']'))
+        except Exception as e:
+            logger.critical(f"Exception loading segment for {result} {result['points']}")
+            raise
+
+    @classmethod
+    def asCoordinates(cls, pointList):
+        points = []
+
+        r = len(pointList) / 2
+        for i in range(int(r)):
+            points.append([
+                pointList[(i)*2],
+                pointList[(i)*2+1]
+                ])
+        return points
+
+    def getD(self):
+        start = self.points[0]
+        d = f'M{start[0]:.4f} {start[1]:.4f} L'
+        for i in range(1, len(self.points)):
+            p = self.points[i]
+            d += f' {p[0]:.4f} {p[1]:.4f}'
+        d += " Z" # segments are always closed
+        return d
+
+    def forJson(self):
+        return self.points
+
+class COCOStorage:
+    def __init__(self, filename):
+        self.logger = logging.getLogger('coco.storage')
+        self.filename = filename
+        if not os.path.exists(self.filename):
+            con = sqlite3.connect(self.filename)
+            cur = con.cursor()
+            d = os.path.dirname(os.path.realpath(__file__))
+            with open(os.path.join(d,'coco.sql'), 'r') as fp:
+                cur.executescript(fp.read())
+            con.close()
+
+        self.con = sqlite3.connect(self.filename)
+        self.con.row_factory = sqlite3.Row
+
+    def propagateFromAnnotations(self, filename):
+        self.logger.info(f"Load {filename}")
+        coco = pycocotools.coco.COCO(filename)
+
+        self.logger.info("Create categories")
+        cur = self.con.cursor()
+        cur.executemany('INSERT OR IGNORE INTO categories(id, supercategory, name) VALUES (:id, :supercategory, :name)', coco.cats.values())
+        self.con.commit()
+
+        self.logger.info("Images...")
+        cur.executemany('''
+            INSERT OR IGNORE INTO images(id, flickr_url, coco_url, width, height, date_captured)
+            VALUES (:id, :flickr_url, :coco_url, :width, :height, :date_captured)
+            ''', coco.imgs.values())
+        self.con.commit()
+
+        self.logger.info("Annotations...")
+
+
+        def annotation_generator():
+            for c in coco.anns.values():
+                ann = c.copy()
+                ann['bbox_top'] = ann['bbox'][1]
+                ann['bbox_left'] = ann['bbox'][0]
+                ann['bbox_width'] = ann['bbox'][2]
+                ann['bbox_height'] = ann['bbox'][3]
+                yield ann
+
+        cur.executemany('''
+            INSERT OR IGNORE INTO annotations(id, image_id, category_id, iscrowd, area, bbox_top, bbox_left, bbox_width, bbox_height)
+            VALUES (:id, :image_id, :category_id, :iscrowd, :area, :bbox_top, :bbox_left, :bbox_width, :bbox_height)
+            ''', annotation_generator())
+        self.con.commit()
+
+
+        self.logger.info("Segments...")
+
+        def segment_generator():
+            for ann in coco.anns.values():
+                for i, seg in enumerate(ann['segmentation']):
+                    yield {
+                        'id': ann['id']*10 + i, # create a uniqe segment id, supports max 10 segments per annotation
+                        'annotation_id': ann['id'],
+                        'points': str(seg)[1:-1],
+                    }
+
+        cur.executemany('''
+            INSERT OR IGNORE INTO segments(id, annotation_id, points)
+            VALUES (:id, :annotation_id, :points)
+            ''', segment_generator())
+        self.con.commit()
+
+
+        self.logger.info("Done...")
+
+    def getCategories(self):
+        if not hasattr(self, 'categories'):
+            cur = self.con.cursor()
+            cur.execute("SELECT * FROM categories ORDER BY id")
+            self.categories = [dict(cat) for cat in cur]
+        return self.categories
+    
+    def getCategory(self, cid):
+        cats = self.getCategories()
+        cat = [c for c in cats if c['id'] == cid]
+        if not len(cat):
+            return None
+        return cat[0]
+
+    def getImage(self, image_id: int):
+        cur = self.con.cursor()
+        cur.execute(f"SELECT * FROM images WHERE id = ? LIMIT 1", (image_id,))
+        img = cur.fetchone()
+        return dict(img)
+
+    def getAnnotationWithoutZerkine(self):
+        cur = self.con.cursor()
+        # annotation 918 and 2206849 have 0 height. Crashing the script... exclude them
+        cur.execute(f"SELECT * FROM annotations WHERE zerkine_moment IS NULL AND area > 0 LIMIT 1")
+        ann = cur.fetchone()
+        if ann:
+            return Annotation(ann, self)
+        else:
+            return None
+
+    def countAnnotationsWithoutZerkine(self):
+        cur = self.con.cursor()
+
+        cur.execute(f"SELECT count(id) FROM annotations WHERE zerkine_moment IS NULL AND area > 0")
+        return int(cur.fetchone()[0])
+
+    def storeZerkineForAnnotation(self, annotation, moments, delayCommit = False):
+        m = ' '.join([str(m) for m in moments])
+        cur = self.con.cursor()
+
+        cur.execute(
+            "UPDATE annotations SET zerkine_moment = :z WHERE id = :id",
+            {'z': m, 'id': annotation.id}
+        )
+        if not delayCommit:
+            self.con.commit()
+        return True
+
+    def getZerkines(self):
+        cur = self.con.cursor()
+        cur.execute(f"SELECT id, zerkine_moment FROM annotations WHERE zerkine_moment IS NOT NULL")
+        return cur.fetchall()
+    
+    def getAllAnnotationPoints(self):
+        cur = self.con.cursor()
+        cur.execute(f"SELECT annotations.id, points FROM annotations INNER JOIN segments ON segments.annotation_id = annotations.id WHERE area > 0")
+        return cur.fetchall()
+
+    def getAnnotationById(self, annotation_id = None, withZerkine = False):
+        if annotation_id == -1:
+            annotation_id = None
+        return self.getRandomAnnotation(annotation_id = annotation_id, withZerkine = withZerkine)
+
+    def getRandomAnnotation(self, annotation_id = None, category_id = None, withZerkine = False):
+        result = self.getRandomAnnotations(annotation_id, category_id, withZerkine, limit=1)
+        return result[0] if len(result) else None
+            
+    def getRandomAnnotations(self, annotation_id = None, category_id = None, withZerkine = False, limit=None):
+        cur = self.con.cursor()
+        where = ""
+        params = []
+        if annotation_id:
+            where = "id = ?"
+            params.append(annotation_id)
+        elif category_id:
+            where = "category_id = ?"
+            params.append(category_id)
+        else:
+            where = "1=1"
+
+        if withZerkine:
+            where += " AND zerkine_moment IS NOT NULL"
+        
+        sqlLimit = ""
+        if limit:
+            sqlLimit = f"LIMIT {int(limit)}"
+
+        cur.execute(f"SELECT * FROM annotations WHERE {where} ORDER BY RANDOM() {sqlLimit}", tuple(params))
+        results = []
+        for ann in cur:
+            results.append(Annotation(ann, self))
+        return results
+#             ann = cur.fetchall()
+#             
+#             if ann:
+#                 return Annotation(ann, self)
+#             else:
+#                 return None
+            
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f5184af
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+tornado
+coloredlogs
+pycocotools
+numpy
+mahotas
+svgwrite
\ No newline at end of file
diff --git a/tools.py b/tools.py
new file mode 100644
index 0000000..f04de17
--- /dev/null
+++ b/tools.py
@@ -0,0 +1,94 @@
+import argparse
+from coco.storage import COCOStorage
+import logging
+import pycocotools.coco
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("tools")
+
+
+def create(args):
+    con = args.storage.con
+    cur = con.cursor()
+    cur.executemany('INSERT OR IGNORE INTO categories(id, supercategory, name) VALUES (:id, :supercategory, :name)', args.coco.cats.values())
+    con.commit()
+
+    logger.info("Images...")
+    cur.executemany('''
+        INSERT OR IGNORE INTO images(id, flickr_url, coco_url, width, height, date_captured)
+        VALUES (:id, :flickr_url, :coco_url, :width, :height, :date_captured)
+        ''', args.coco.imgs.values())
+    con.commit()
+
+    logger.info("Annotations...")
+
+
+    def annotation_generator():
+        for c in args.coco.anns.values():
+            ann = c.copy()
+            ann['bbox_top'] = ann['bbox'][1]
+            ann['bbox_left'] = ann['bbox'][0]
+            ann['bbox_width'] = ann['bbox'][2]
+            ann['bbox_height'] = ann['bbox'][3]
+            yield ann
+
+    cur.executemany('''
+        INSERT OR IGNORE INTO annotations(id, image_id, category_id, iscrowd, area, bbox_top, bbox_left, bbox_width, bbox_height)
+        VALUES (:id, :image_id, :category_id, :iscrowd, :area, :bbox_top, :bbox_left, :bbox_width, :bbox_height)
+        ''', annotation_generator())
+    con.commit()
+
+
+    logger.info("Segments...")
+
+    def segment_generator():
+        for ann in args.coco.anns.values():
+            for i, seg in enumerate(ann['segmentation']):
+                yield {
+                    'id': ann['id']*10 + i, # create a uniqe segment id, supports max 10 segments per annotation
+                    'annotation_id': ann['id'],
+                    'points': str(seg)[1:-1],
+                }
+
+    cur.executemany('''
+        INSERT OR IGNORE INTO segments(id, annotation_id, points)
+        VALUES (:id, :annotation_id, :points)
+        ''', segment_generator())
+    con.commit()
+
+
+    logger.info("Done...")
+
+
+if __name__ == "__main__":
+        
+    parser = argparse.ArgumentParser()
+
+    subparsers = parser.add_subparsers(title = 'subcommands', help="Use command -h for specific help")
+
+    parser_create = subparsers.add_parser('create')
+    parser_create.add_argument(
+        '--annotations',
+        metavar="JSON_FILENAME",
+        type=pycocotools.coco.COCO,
+        dest='coco',
+        default='dataset/annotations/instances_val2017.json'
+        )
+    parser_create.add_argument(
+            '--db',
+            type=COCOStorage,
+            metavar='DATABASE',
+            dest='storage',
+            help='SQLite db filename, will be created if not existing',
+            default='dataset/instances_val2017.db'
+        )
+    parser_create.set_defaults(target = create)
+
+    # parser_build = subparsers.add_parser('build')
+
+    args = parser.parse_args()
+    if 'target' in args:
+        args.target(args)
+    else:
+        parser.print_help()
+        exit(1)
\ No newline at end of file