Add possibility to use segments for beter face detection

2017-11-24 12:41:38 +01:00 · 2017-11-24 12:41:38 +01:00 · 3b0e0eda16
commit 3b0e0eda16
parent ff4ccd7410
4 changed files with 281 additions and 58 deletions
--- a/README.md
+++ b/README.md
@ -7,3 +7,7 @@ gphoto2 to capture images:
 The modified 'webcam demo' to analyse and generate json:

 `/home/crowd/build/opencv-webcam-demo/opencv-webcam-demo --data /home/crowd/affdex-sdk/data --faceMode 1 --numFaces 80 -o /home/crowd/output-backup/ --draw 0`
+
+
+Using split_and_merge_output, frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files)
+split_and_merge_output also remerges these segment%06.json files into a frame%06d.json file.
--- a/opencv-webcam-demo/opencv-webcam-demo.cpp
+++ b/opencv-webcam-demo/opencv-webcam-demo.cpp
@ -143,6 +143,7 @@ int main(int argsc, char ** argsv)
        int buffer_length = 2;
        unsigned int nFaces = 1;
        bool draw_display = true;
+        bool use_segments = false;
        int faceDetectorMode = (int)FaceDetectorMode::SMALL_FACES;
        boost::filesystem::path imgPath("~/emo_in_file.jpg");
        boost::filesystem::path outPath("~/output/");
@ -167,6 +168,7 @@ int main(int argsc, char ** argsv)
            ("faceMode", po::value< int >(&faceDetectorMode)->default_value((int)FaceDetectorMode::SMALL_FACES), "Face detector mode (large faces vs small faces).")
            ("numFaces", po::value< unsigned int >(&nFaces)->default_value(1), "Number of faces to be tracked.")
            ("draw", po::value< bool >(&draw_display)->default_value(true), "Draw metrics on screen.")
+            ("segments", po::value< bool >(&use_segments)->default_value(use_segments), "Use 'segments' rather than 'frames' (influences detecting frame%06d or segment%06d).")
            //~ ("file,f", po::value< boost::filesystem::path >(&imgPath)->default_value(imgPath), "Filename of image that is watched/tracked for changes.")
            ("frameOutput,o", po::value< boost::filesystem::path >(&outPath)->default_value(outPath), "Directory to store the frame in (and json)")
            ;
@ -247,7 +249,7 @@ int main(int argsc, char ** argsv)
 		int seconds = 1;
        while(true){ //(cv::waitKey(20) != -1);
 			char buff[100];
-			snprintf(buff, sizeof(buff), "frame%06d.jpg", frameNrIn);
+			snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.jpg" : "frame%06d.jpg"), frameNrIn);
 			boost::filesystem::path imgPath = outPath / buff;
 			if ( !boost::filesystem::exists( imgPath.native() )|| frameNrIn > frameNrOut ) {
 				// wait for file to appear
@ -257,7 +259,7 @@ int main(int argsc, char ** argsv)
 				std::cerr << "Read " << imgPath.native() << std::endl;
 				
 				char buff[100];
-				snprintf(buff, sizeof(buff), "frame%06d.json", frameNrIn);
+				snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrIn);
 				boost::filesystem::path jsonPath = outPath / buff;
 				
 				// don't redo existing jsons
@ -300,7 +302,7 @@ int main(int argsc, char ** argsv)
                
                // store json
 				char buff[100];
-				snprintf(buff, sizeof(buff), "frame%06d.json", frameNrOut);
+				snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrOut);
 				boost::filesystem::path targetFilename = outPath / buff;
 				std::ofstream out(targetFilename.native());
 				std::cerr << "write "<< targetFilename.native() << std::endl;
--- a/parse_output.py
+++ b/parse_output.py
@ -1,18 +1,50 @@
 import os
-from PIL import Image, ImageDraw
+from PIL import Image, ImageDraw,ImageTk
 import argparse
 import json
 import time
 import glob
 import numpy as np

+import Tkinter
+
+
+
+
+facialParameters = [
+	"smile",
+	"innerBrowRaise",
+	"browRaise",
+	"browFurrow",
+	"noseWrinkle",
+	"upperLipRaise",
+	"lipCornerDepressor",
+	"chinRaise",
+	"lipPucker",
+	"lipPress",
+	"lipSuck",
+	"mouthOpen",
+	"smirk",
+	#~ "attention",
+	"eyeClosure",
+	"eyeWiden",
+	"cheekRaise",
+	"lidTighten",
+	"dimpler",
+	"lipStretch",
+	"jawDrop",
+]

 parser = argparse.ArgumentParser(description='Parses opencv-webcam-demo json output files and collects statistics')
 parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json')
-parser.add_argument('--status', '-s', action='store_true', help='Keep status of last frame')
+parser.add_argument('--status', action='store_true', help='Keep status of last frame')
 parser.add_argument('--cutAllFaces', action='store_true', help='Cut out all faces from all frames')
 parser.add_argument('--sum', action='store_true', help='Get total scores over all time')
+parser.add_argument('--unique', action='store_true', help='Get most unique window')
+parser.add_argument('--avg', action='store_true', help='Get most average window')
 parser.add_argument('--disonant', action='store_true', help='Get most disonant faces over time')
+parser.add_argument('--window-size', '-s', type=int, default=10, help='The nr of frames to group in one sliding window for analysis')
+parser.add_argument("--params", "-p",  type=str, nargs='+', default=facialParameters, choices=facialParameters, help="The parameters used to calculate the statistics")

 args = parser.parse_args()

@ -29,30 +61,8 @@ class Face:
 		r = self.data['rect']
 		return self.frame.getImg().crop((int(r['x']), int(r['y']), int(r['x']+r['w']), int(r['y']+r['h'])))
 	
-	def getCharacteristicVector(self):
-		self.vector = np.array([
-			self.data["smile"],
-			self.data["innerBrowRaise"],
-			self.data["browRaise"],
-			self.data["browFurrow"],
-			self.data["noseWrinkle"],
-			self.data["upperLipRaise"],
-			self.data["lipCornerDepressor"],
-			self.data["chinRaise"],
-			self.data["lipPucker"],
-			self.data["lipPress"],
-			self.data["lipSuck"],
-			self.data["mouthOpen"],
-			self.data["smirk"],
-			self.data["eyeClosure"],
-			# self.data["attention"],
-			self.data["eyeWiden"],
-			self.data["cheekRaise"],
-			self.data["lidTighten"],
-			self.data["dimpler"],
-			self.data["lipStretch"],
-			self.data["jawDrop"],
-		])
+	def getCharacteristicVector(self, params):
+		self.vector = [self.data[p] for p in params]
 		return self.vector
 	
 	def setAnomalyScore(self, score):
@ -81,8 +91,17 @@ class Frame:
 			#~ # no json file yet?
 			#~ return None
 			
-	def getImg(self):
-		return Image.open(self.imgPath)
+	def getImg(self, markFaces = True):
+		img = Image.open(self.imgPath)
+		if not markFaces:
+			return img
+		
+		draw = ImageDraw.Draw(img)
+		for f in self.faces:
+			xy1 = (int(f.data['rect']['x']), int(f.data['rect']['y']))
+			xy2 = (int(f.data['rect']['x'] + f.data['rect']['w']), int(f.data['rect']['y'] + f.data['rect']['h']))
+			draw.rectangle([xy1, xy2], outline="#ff0000")
+		return img
 		
 	def getFaces(self):
 		if self.faces is None:
@ -108,13 +127,13 @@ class Frame:
 		for face in self.getFaces():
 			face.disonanceScore = abs(face.data['valence'] - avgValence)
 	
-	def getAverageV(self):
-		vectors = [face.getCharacteristicVector() for face in self.getFaces()]
+	def getAverageV(self, params):
+		vectors = [face.getCharacteristicVector(params) for face in self.getFaces()]
 		vAvg = np.mean(vectors, axis=0)
 		return vAvg
 	
-	def updateAnomalyScores(self):
-		vAvg = self.getAverageV()
+	def updateAnomalyScores(self, params):
+		vAvg = self.getAverageV(params)
 		for face in self.getFaces():
 			face.setAnomalyScore(np.linalg.norm(face.getCharacteristicVector() - vAvg))
 		
@ -124,6 +143,88 @@ class Frame:

 frames = {}

+class Window:
+	def __init__(self, frameSubset):
+		"""
+		Init a sliding window for given Frame-s
+		"""
+		self.frames = frameSubset
+		self.deviation = None
+		self.standardDeviation = None
+	
+	def getFaces(self):
+		faces = []
+		for frame in self.frames:
+			faces.extend(frame.getFaces())
+		return faces
+	
+	def getStdDev(self, params):
+		"""
+		Get standard deviation of the faces within the window for given params
+		"""
+		vectors = [f.getCharacteristicVector(params) for f in self.getFaces()]
+		return np.std(vectors)
+	
+	def getAverageV(self, params):
+		vectors = [f.getCharacteristicVector(params) for f in self.getFaces()]
+		vAvg = np.mean(vectors, axis=0)
+		return vAvg
+	
+	@staticmethod
+	def createWindows(windowSize, frames):
+		"""
+		Give a full list of frames and tunrn it into a collection of sliding windows
+		"""
+		frames = sorted(frames.items(), key=lambda f: f[0])
+		frames = [f[1] for f in frames]
+		windows = []
+		windowCount = len(frames) - windowSize + 1
+		if windowCount < 1:
+			raise Exception("Not enough frames ({}) for a window of size {}".format(len(frames), windowSize))
+		
+		for offset in range(0, windowCount):
+			frameSubset = frames[offset:offset+windowSize]
+			windows.append(Window(frameSubset))
+		
+		return windows
+
+class WindowCollection:
+	def __init__(self, windowSize, frames):
+		self.windows = Window.createWindows(windowSize, frames)
+		self.frames = frames
+		#~ self.faces = [face for face in frame.getFaces() for frame in frames]
+	
+	#~ def getMostWindowsClosestToMedian(self, nr = 5):
+		#~ """
+		#~ Get windows with the faces closest to the median
+		#~ """
+		#~ self.faces
+	
+	def getWindowVectors(self, params):
+		return [window.getAverageV(params) for window in self.windows]
+	
+	def getWindowsByDeviation(self, params):
+		vectors = self.getWindowVectors(params)
+		vAvg = np.mean(vectors, axis=0)
+		#~ diffs = [numpy.linalg.norm(v-vAvg) for v in vectors]
+		#~ min_index, min_value = min(enumerate(diffs), key=lambda p: p[1])
+		#~ max_index, max_value = max(enumerate(diffs), key=lambda p: p[1])
+		
+		return sorted(self.windows, key=lambda w: np.linalg.norm(w.getAverageV(params)-vAvg))
+		
+	def getUniqueWindows(self, params, nr=5):
+		windows = self.getWindowsByDeviation(params)
+		return windows[0: nr]
+	
+	def getMostAvgWindows(self, params, nr=5):
+		windows = self.getWindowsByDeviation(params)
+		windows.reverse()
+		return windows[0:nr]
+	
+	def getMostContrastingWindows(self, params, nr=5):
+		sortedWindows = sorted(self.windows, key=lambda w: w.getStdDev(params), reverse=True)
+		return sortedWindows[0:nr]
+		
 def loadFrames(frameDir):
 	global frames
 	nr = 2
@ -185,9 +286,9 @@ def getMostDisonant(nr = 5):
 		print("Frame %d, face %d, score %d, valence %d" % (face.frame.nr, face.id, face.disonanceScore, face.data['valence']))
 		face.getFaceImg().show()

-def getAnomalies(nr = 5):
+def getAnomalies(params, nr = 5):
 	for frameNr, frame in loadFrames(args.frameOutput).items():
-		frame.updateAnomalyScores()
+		frame.updateAnomalyScores(params)
 	faces.sort(key=lambda x: x.anomalyScore, reverse=True)
 	
 	anomalies = faces[:nr]
@ -196,10 +297,10 @@ def getAnomalies(nr = 5):
 		#~ getCharacteristicVector
 		face.getFaceImg().show()

-def printFrameStats(frame):
+def printFrameStats(frame, params):
 	os.system('clear')
 	print(time.time())
-	print( ("Nr: %d" % frame.nr).ljust(40) + ("t: %f" % frame.getJson()['t']) )
+	print( ("Nr: %d" % frame.nr).ljust(40) + ("t: {}".format(frame.getJson()['t'])) )
 	#~ print
 	faces = frame.getFaces()
 	print("Faces: %d" % len(faces))
@ -207,24 +308,16 @@ def printFrameStats(frame):
 	if len(faces) < 1:
 		return
 	
-	params = ['smile', 'browFurrow']
+	print " ".ljust(20), "0%".rjust(13), "q1".rjust(13), "median".rjust(13), "q3".rjust(13), "100%".rjust(13)
 	
-	q0s = [np.percentile(np.array([f.data[param] for f in faces]),0) for param in params]
-	q1s = [np.percentile(np.array([f.data[param] for f in faces]),25) for param in params]
-	q2s = [np.percentile(np.array([f.data[param] for f in faces]),50) for param in params]
-	q3s = [np.percentile(np.array([f.data[param] for f in faces]),75) for param in params]
-	q4s = [np.percentile(np.array([f.data[param] for f in faces]),100) for param in params]
-	
-	print " ".ljust(8),
 	for p in params:
-		print p.center(20),
-	print ""
+		q0 = np.percentile(np.array([f.data[p] for f in faces]),0)
+		q1 = np.percentile(np.array([f.data[p] for f in faces]),25)
+		q2 = np.percentile(np.array([f.data[p] for f in faces]),50)
+		q3 = np.percentile(np.array([f.data[p] for f in faces]),75)
+		q4 = np.percentile(np.array([f.data[p] for f in faces]),100)
+		print p.ljust(20), ("%f%%" % q0).rjust(13), ("%f%%" % q1).rjust(13),("%f%%" % q2).rjust(13),("%f%%" % q3).rjust(13),("%f%%" % q4).rjust(13)
 	
-	print("     0% " + "".join([("%f%%" % q).rjust(20) for q in q0s]))
-	print("     q1 " + "".join([("%f%%" % q).rjust(20) for q in q1s]))
-	print(" median " + "".join([("%f%%" % q).rjust(20) for q in q2s]))
-	print("     q3 " + "".join([("%f%%" % q).rjust(20) for q in q3s]))
-	print("   100% " + "".join([("%f%%" % q).rjust(20) for q in q4s]))
 	
 	#~ TODO: speaker stats
 	
@ -233,15 +326,37 @@ def printFrameStats(frame):
 	#~ dissonantFace.getFaceImg()
 	

-def monitorStatus(frameDir):
+def monitorStatus(frameDir, params):
 	while True:
 		frame = getLastFrame(frameDir)
 		if not frame is None:
-			printFrameStats(frame)
+			printFrameStats(frame, params)
 			
 		# don't check too often	
 		time.sleep(.5)

+def playWindowStopmotion(window):
+	"""
+	Play a set of sliding window frames as stop motion video
+	"""
+	root = Tkinter.Tk()
+	root.geometry('%dx%d+%d+%d' % (1000,1000,0,0))	
+	canvas = Tkinter.Canvas(root,width=1000,height=1000)
+	canvas.pack()
+	
+	old_label_image = None
+	for frame in window.frames:
+		image = frame.getImg()
+		basewidth = 1000
+		wpercent = (basewidth / float(image.size[0]))
+		hsize = int((float(image.size[1]) * float(wpercent)))
+		image = image.resize((basewidth, hsize), Image.ANTIALIAS)
+		
+		tkpi = ImageTk.PhotoImage(image)
+		canvas.delete("IMG")
+		imagesprite = canvas.create_image(500,500,image=tkpi, tags="IMG")
+		root.update()
+		time.sleep(1)

 validateJsonTimes()
 	
@ -259,6 +374,17 @@ if args.cutAllFaces:
 	for frameNr, frame in loadFrames(args.frameOutput).items():
 		cutOutFaces(faceDir)

+if args.unique:
+	collection = WindowCollection(args.window_size, frames)
+	windows = collection.getUniqueWindows(args.params)
+	#~ print(windows)
+	playWindowStopmotion(windows[0])
+
+if args.avg:
+	collection = WindowCollection(args.window_size, frames)
+	windows = collection.getMostAvgWindows(args.params)
+	#~ print(windows)
+	playWindowStopmotion(windows[0])

 if args.status:
-	monitorStatus(args.frameOutput)
+	monitorStatus(args.frameOutput, args.params)
--- a/split_and_merge_output.py
+++ b/split_and_merge_output.py
@ -0,0 +1,91 @@
+import argparse
+import os
+from PIL import Image
+import time
+import json
+
+parser = argparse.ArgumentParser(description="""
+Frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files) split_and_merge_output.py also remerges these segment%06.json files into a frame%06d.json file.
+""")
+parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json')
+parser.add_argument('--segmentDir', required=True, help='directory to write segfor frames & json')
+
+args = parser.parse_args()
+
+if not os.path.exists(args.frameOutput):
+	raise Exception(args.frameOutput + " does not exist")
+if not os.path.exists(args.segmentDir):
+	raise Exception(args.segmentDir + " does not exist")
+
+def getSectionsForSize(size):
+	"""
+	Get sections based on image size, in a tuple format (width, height)
+	"""
+	return [
+		( 0,				0,					int(size[0]/1.85),	int(size[1]/1.85) ),
+		( int(size[0]/2.1),	0,					size[0], 			int(size[1]/1.85) ),
+		( 0,				int(size[1]/2.1),	int(size[0]/1.85),	size[1]  	),
+		( int(size[0]/2.1),	int(size[1]/2.1),	size[0], 			size[1]  	),
+	]
+
+nextInNr = 1
+segmentCount = 4 # cannot really be changed for now
+nextOutNr = segmentCount
+while True:
+	nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
+	nextInJson = os.path.join(args.frameOutput, "frame%06d.json" % nextInNr)
+	while os.path.exists(nextJpg) and not os.path.exists(nextInJson):		
+		print("(SPLIT) Found: {}".format(nextJpg))
+		img = Image.open(nextJpg)
+		
+		segments = getSectionsForSize(img.size)
+		
+		for i, segment in enumerate(segments):
+			segmentImg = img.crop(segment)
+			segmentNr = (nextInNr-1) * segmentCount + i + 1 # start at 1, increment from there
+			segmentFilename = os.path.join(args.segmentDir, "segment%06d.jpg" % segmentNr)
+			segmentImg.save(segmentFilename, quality=90)
+			print("(SPLIT) Created: {}".format(segmentFilename))
+		
+		nextInNr += 1
+		nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
+		nextInJson = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
+	
+	nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr)
+	while os.path.exists(nextJson):
+		print("(MERGE) Found: {}".format(nextJson))
+		frameOutNr = (nextOutNr - 1) / segmentCount + 1 # 1,2,3,4 -> 1; 5,6,7,8 -> 2 (uses trick to divide ints: 7/4=1)
+		img = Image.open( os.path.join(args.frameOutput, "frame%06d.jpg" % frameOutNr) )
+		segments = getSectionsForSize(img.size)
+		creationTime = img._getexif()[36867]
+		
+		faces = []
+		times = []
+		
+		for i, segment in enumerate(segments):
+			segmentNr = nextOutNr - segmentCount + i + 1 # nextOutNr: 4 => 1,2,3,4
+			with open( os.path.join(args.segmentDir, "segment%06d.json" % segmentNr) ) as fp:
+				j = json.load(fp)
+				
+			for f in j['faces']:
+				f['rect']['x'] += segment[0]
+				f['rect']['y'] += segment[1]
+				f['id'] = "%d%02d" % ((i+1), f['id'])
+				faces.append(f)
+			times.append(j['t'])
+		
+		data = {
+			'nr': int(frameOutNr),
+			't': creationTime,
+			'faces': faces,
+			'sectionTimes': times,
+		}
+		jsonOutFilename = os.path.join(args.frameOutput, "frame%06d.json" % frameOutNr)
+		with open(jsonOutFilename, 'w') as fp:
+			json.dump(data, fp)
+			print("(MERGE) Wrote: {}".format(jsonOutFilename))
+		
+		nextOutNr += segmentCount
+		nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr)
+		
+	time.sleep(0.2)