alleswatikvoel/loader.py

import os
import argparse
import scipy
import scipy.misc
import scipy.cluster
import numpy as np
import models
import random
from PIL import Image
import colour
import requests
import time

"""
Load images from a csv file into the database and/or process unprocessed images from the database
"""

NUM_CLUSTERS = 64

def loadFile(indexFileName):
	works = []

	with open(indexFileName, 'r') as fp:
		for line in fp:
			# print(line)
			filename, name, group, emotion = line.split(",")
			works.append({
				'filename':filename.strip(),
				'name':name.strip(),
				'group': group.strip(),
				'emotion': emotion.strip()
				})
			# print(filename,name, group, emotion)

	emotions = list(set([work['emotion'] for work in works]))
	groups = list(set([work['emotion'] for work in works]))
	emoCounts = {}
	for work in works:
		if work['emotion'] in emoCounts:
			emoCounts[work['emotion']] += '#'
		else:
			emoCounts[work['emotion']] = "#"
	groupCounts = {}
	for work in works:
		if work['group'] in groupCounts:
			groupCounts[work['group']] += '#'
		else:
			groupCounts[work['group']] = "#"

	emotions = {}
	for emo, count in emoCounts.items():
		emotion = models.Emotion(name=emo)
		emotion.save()
		emotions[emo] = emotion
		print(emo.center(20), ('('+str(len(count))+') ').rjust(5), count)

	groups = {}
	for g, count in groupCounts.items():
		group = models.Group(name=g)
		group.save()
		groups[g] = group
		print(g.center(20), ('('+str(len(count))+') ').rjust(5), count)

	i = 0
	for work in works:
		i+=1
		img = models.Artwork()
		# img.gender = random.choice(genders)
		img.author = work['name']
		# img.age = i + 4 + random.choice([-1,0,0,0,0,1,1,2])
		img.group = groups[work['group']]
		img.emotion = emotions[work['emotion']]
		img.filename = work['filename']
		# img.colours = getColoursForImageByClusters(Image.open(img.filename))
		img.save()
		print("\r%d/%d" % (i, len(works)), end="")

	print("Loaded",len(works),"from", indexFileName)

def processWorksFromDb():
	artworks = models.Artwork.select().where(models.Artwork.colours == None | models.Artwork.colours == None)
	for work in artworks:
		processWork(work)

def processWork(work : models.Artwork):
	print("Processing", work.id,"by '"+work.author+"' from", work.group.name)
	if work.colours is None:
		c = colour.getColoursForImageByMeanShiftClusters(Image.open(work.filename), 'images/%s' % work.id)
		work.colours = c
		work.save()
	if work.faces is None:
		f = loadEmotionsFromMs(work.filename)
		work.faces = f
		work.save()
	if not os.path.exists(work.getThumbPath()) or work.width is None or work.height is None:
		img = Image.open(work.filename)

		work.width = img.size[0]
		work.height = img.size[1]
		work.save()

		img.thumbnail((200,200))
		img.save(work.getThumbPath())
	return

def loadEmotionsFromMs(filename):
	headers = {
		'Content-Type':'application/octet-stream',
		'Ocp-Apim-Subscription-Key': '3cb36f05201943679906e6ed68d4318c',
	}
	data=open(filename,'rb').read()
	r = requests.post('https://api.projectoxford.ai/emotion/v1.0/recognize', data=data, headers = headers)
	time.sleep(4) # avoid limit

	responsedata = r.json()

	if r.status_code != 200:
		raise Exception(responsedata['error']['message'])

	if len(responsedata) < 1:
		return []

	'''
	[ {
			u'faceRectangle': {u'width': 153, u'top': 143, u'left': 222, u'height': 153},
			u'scores': {u'sadness': 0.0135554466, u'neutral': 0.935254037, u'contempt': 0.00121790112, u'disgust': 0.0111144362, u'anger': 0.03275946, u'surprise': 0.0057246, u'fear': 0.000241575908, u'happiness': 0.00013255408}
	 }, ...]
	'''

	return responsedata
	# for d in responsedata:
	# 	response = {'faces': []}
	# 	face = {}
	# 	face['metrics'] = {name:score*100 for name,score in d['scores'].items()}
	# 	face['metrics']['joy'] = face['metrics']['happiness'] * 100
	# 	face['faceRectangle'] = d['faceRectangle']
	# 	response['faces'].append(face)


def main():
	parser = argparse.ArgumentParser(description='Load an csv file into the database')
	parser.add_argument('--file', type=str, default=None, help='CVS file to load')
	parser.add_argument('--reset', action='store_true', default=False, help='Clear db on load')
	parser.add_argument('--process', action='store_true', default=False, help='Load artworks from the database that have not been processed yet and process those')
	# parser.add_argument('--db', type=str, default="images.db", help='Specify custom db')

	args = parser.parse_args()

	if not os.path.exists('images.db'):
		models.db.create_tables([models.Emotion, models.Group, models.Artwork])
	if args.reset:
		models.db.truncate_tables([models.Emotion, models.Group, models.Artwork])

	if not args.file is None:
		loadFile(args.file)

	if args.process is True:
		processWorksFromDb()

def getColourAsHex(colour):
	return '#' + ''.join(format(c, '02x') for c in colour.astype(int))

def getColoursForImageByClusters(image):
	"""
	Adapted on answers by
	Peter Hansen (http://stackoverflow.com/a/3244061)
	& Johan Mickos (http://stackoverflow.com/a/34140327)
	"""
	im = image.copy().resize((150, 150))      # optional, to reduce time
	ar = scipy.misc.fromimage(im)
	shape = ar.shape
	ar = ar.reshape(scipy.product(shape[:2]), shape[2])

#     print( 'finding clusters')
	codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS)
#     print ('cluster centres:\n', codes)

	vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
	counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

# When only looking for single color:
#     index_max = scipy.argmax(counts)                    # find most frequent
#     peak = codes[index_max]
#     colour = ''.join(chr(c) for c in peak).encode('hex')
#     print( 'most frequent is %s (#%s)' % (peak, colour))

	percentages = 100 * counts / sum(counts)
#     print("Percentages", percentages)
#     colours = [ in codes]
#     print(colours)
	return list(zip(codes, percentages))

def getColoursForImageByPxAvg(image):
	'''
	Don't use this one, blunt way to get average pixel colors
	'''
	im = image.copy().resize((8, 8))
	pixels = np.concatenate(scipy.misc.fromimage(im))
	#     colours = ['#' + ''.join(format(c, '02x') for c in color.astype(int)) for color in pixels]
	percentages = np.zeros(len(pixels)) + (100 / len(pixels))
	return list(zip(pixels, percentages))

def getColoursAsHTML(colours):
	return " ".join(['<span style="background:%s">%s - (%s %%)</span>' % (getColourAsHex(colour[0]), getColourAsHex(colour[0]), colour[1]) for colour in colours]);

if __name__ == '__main__':
	main()