alleswatikvoel/loader.py

208 lines
6.4 KiB
Python

import os
import argparse
import scipy
import scipy.misc
import scipy.cluster
import numpy as np
import models
import random
from PIL import Image
import colour
import requests
import time
"""
Load images from a csv file into the database and/or process unprocessed images from the database
"""
NUM_CLUSTERS = 64
def loadFile(indexFileName):
works = []
with open(indexFileName, 'r') as fp:
for line in fp:
# print(line)
filename, name, group, emotion = line.split(",")
works.append({
'filename':filename.strip(),
'name':name.strip(),
'group': group.strip(),
'emotion': emotion.strip()
})
# print(filename,name, group, emotion)
emotions = list(set([work['emotion'] for work in works]))
groups = list(set([work['emotion'] for work in works]))
emoCounts = {}
for work in works:
if work['emotion'] in emoCounts:
emoCounts[work['emotion']] += '#'
else:
emoCounts[work['emotion']] = "#"
groupCounts = {}
for work in works:
if work['group'] in groupCounts:
groupCounts[work['group']] += '#'
else:
groupCounts[work['group']] = "#"
emotions = {}
for emo, count in emoCounts.items():
emotion = models.Emotion(name=emo)
emotion.save()
emotions[emo] = emotion
print(emo.center(20), ('('+str(len(count))+') ').rjust(5), count)
groups = {}
for g, count in groupCounts.items():
group = models.Group(name=g)
group.save()
groups[g] = group
print(g.center(20), ('('+str(len(count))+') ').rjust(5), count)
i = 0
for work in works:
i+=1
img = models.Artwork()
# img.gender = random.choice(genders)
img.author = work['name']
# img.age = i + 4 + random.choice([-1,0,0,0,0,1,1,2])
img.group = groups[work['group']]
img.emotion = emotions[work['emotion']]
img.filename = work['filename']
# img.colours = getColoursForImageByClusters(Image.open(img.filename))
img.save()
print("\r%d/%d" % (i, len(works)), end="")
print("Loaded",len(works),"from", indexFileName)
def processWorksFromDb():
artworks = models.Artwork.select().where(models.Artwork.colours == None | models.Artwork.colours == None)
for work in artworks:
processWork(work)
def processWork(work : models.Artwork):
print("Processing", work.id,"by '"+work.author+"' from", work.group.name)
if work.colours is None:
c = colour.getColoursForImageByMeanShiftClusters(Image.open(work.filename), 'images/%s' % work.id)
work.colours = c
work.save()
if work.faces is None:
f = loadEmotionsFromMs(work.filename)
work.faces = f
work.save()
if not os.path.exists(work.getThumbPath()) or work.width is None or work.height is None:
img = Image.open(work.filename)
work.width = img.size[0]
work.height = img.size[1]
work.save()
img.thumbnail((200,200))
img.save(work.getThumbPath())
return
def loadEmotionsFromMs(filename):
headers = {
'Content-Type':'application/octet-stream',
'Ocp-Apim-Subscription-Key': '3cb36f05201943679906e6ed68d4318c',
}
data=open(filename,'rb').read()
r = requests.post('https://api.projectoxford.ai/emotion/v1.0/recognize', data=data, headers = headers)
time.sleep(4) # avoid limit
responsedata = r.json()
if r.status_code != 200:
raise Exception(responsedata['error']['message'])
if len(responsedata) < 1:
return []
'''
[ {
u'faceRectangle': {u'width': 153, u'top': 143, u'left': 222, u'height': 153},
u'scores': {u'sadness': 0.0135554466, u'neutral': 0.935254037, u'contempt': 0.00121790112, u'disgust': 0.0111144362, u'anger': 0.03275946, u'surprise': 0.0057246, u'fear': 0.000241575908, u'happiness': 0.00013255408}
}, ...]
'''
return responsedata
# for d in responsedata:
# response = {'faces': []}
# face = {}
# face['metrics'] = {name:score*100 for name,score in d['scores'].items()}
# face['metrics']['joy'] = face['metrics']['happiness'] * 100
# face['faceRectangle'] = d['faceRectangle']
# response['faces'].append(face)
def main():
parser = argparse.ArgumentParser(description='Load an csv file into the database')
parser.add_argument('--file', type=str, default=None, help='CVS file to load')
parser.add_argument('--reset', action='store_true', default=False, help='Clear db on load')
parser.add_argument('--process', action='store_true', default=False, help='Load artworks from the database that have not been processed yet and process those')
# parser.add_argument('--db', type=str, default="images.db", help='Specify custom db')
args = parser.parse_args()
if not os.path.exists('images.db'):
models.db.create_tables([models.Emotion, models.Group, models.Artwork])
if args.reset:
models.db.truncate_tables([models.Emotion, models.Group, models.Artwork])
if not args.file is None:
loadFile(args.file)
if args.process is True:
processWorksFromDb()
def getColourAsHex(colour):
return '#' + ''.join(format(c, '02x') for c in colour.astype(int))
def getColoursForImageByClusters(image):
"""
Adapted on answers by
Peter Hansen (http://stackoverflow.com/a/3244061)
& Johan Mickos (http://stackoverflow.com/a/34140327)
"""
im = image.copy().resize((150, 150)) # optional, to reduce time
ar = scipy.misc.fromimage(im)
shape = ar.shape
ar = ar.reshape(scipy.product(shape[:2]), shape[2])
# print( 'finding clusters')
codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS)
# print ('cluster centres:\n', codes)
vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes
counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences
# When only looking for single color:
# index_max = scipy.argmax(counts) # find most frequent
# peak = codes[index_max]
# colour = ''.join(chr(c) for c in peak).encode('hex')
# print( 'most frequent is %s (#%s)' % (peak, colour))
percentages = 100 * counts / sum(counts)
# print("Percentages", percentages)
# colours = [ in codes]
# print(colours)
return list(zip(codes, percentages))
def getColoursForImageByPxAvg(image):
'''
Don't use this one, blunt way to get average pixel colors
'''
im = image.copy().resize((8, 8))
pixels = np.concatenate(scipy.misc.fromimage(im))
# colours = ['#' + ''.join(format(c, '02x') for c in color.astype(int)) for color in pixels]
percentages = np.zeros(len(pixels)) + (100 / len(pixels))
return list(zip(pixels, percentages))
def getColoursAsHTML(colours):
return " ".join(['<span style="background:%s">%s - (%s %%)</span>' % (getColourAsHex(colour[0]), getColourAsHex(colour[0]), colour[1]) for colour in colours]);
if __name__ == '__main__':
main()