From 3b0e0eda162110ee1850f848121d546d594c23fb Mon Sep 17 00:00:00 2001 From: Ruben Date: Fri, 24 Nov 2017 12:41:38 +0100 Subject: [PATCH] Add possibility to use segments for beter face detection --- README.md | 4 + opencv-webcam-demo/opencv-webcam-demo.cpp | 8 +- parse_output.py | 236 +++++++++++++++++----- split_and_merge_output.py | 91 +++++++++ 4 files changed, 281 insertions(+), 58 deletions(-) create mode 100644 split_and_merge_output.py diff --git a/README.md b/README.md index 3a27614..aa816dc 100644 --- a/README.md +++ b/README.md @@ -7,3 +7,7 @@ gphoto2 to capture images: The modified 'webcam demo' to analyse and generate json: `/home/crowd/build/opencv-webcam-demo/opencv-webcam-demo --data /home/crowd/affdex-sdk/data --faceMode 1 --numFaces 80 -o /home/crowd/output-backup/ --draw 0` + + +Using split_and_merge_output, frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files) +split_and_merge_output also remerges these segment%06.json files into a frame%06d.json file. diff --git a/opencv-webcam-demo/opencv-webcam-demo.cpp b/opencv-webcam-demo/opencv-webcam-demo.cpp index 57df795..da8f382 100644 --- a/opencv-webcam-demo/opencv-webcam-demo.cpp +++ b/opencv-webcam-demo/opencv-webcam-demo.cpp @@ -143,6 +143,7 @@ int main(int argsc, char ** argsv) int buffer_length = 2; unsigned int nFaces = 1; bool draw_display = true; + bool use_segments = false; int faceDetectorMode = (int)FaceDetectorMode::SMALL_FACES; boost::filesystem::path imgPath("~/emo_in_file.jpg"); boost::filesystem::path outPath("~/output/"); @@ -167,6 +168,7 @@ int main(int argsc, char ** argsv) ("faceMode", po::value< int >(&faceDetectorMode)->default_value((int)FaceDetectorMode::SMALL_FACES), "Face detector mode (large faces vs small faces).") ("numFaces", po::value< unsigned int >(&nFaces)->default_value(1), "Number of faces to be tracked.") ("draw", po::value< bool >(&draw_display)->default_value(true), "Draw metrics on screen.") + ("segments", po::value< bool >(&use_segments)->default_value(use_segments), "Use 'segments' rather than 'frames' (influences detecting frame%06d or segment%06d).") //~ ("file,f", po::value< boost::filesystem::path >(&imgPath)->default_value(imgPath), "Filename of image that is watched/tracked for changes.") ("frameOutput,o", po::value< boost::filesystem::path >(&outPath)->default_value(outPath), "Directory to store the frame in (and json)") ; @@ -247,7 +249,7 @@ int main(int argsc, char ** argsv) int seconds = 1; while(true){ //(cv::waitKey(20) != -1); char buff[100]; - snprintf(buff, sizeof(buff), "frame%06d.jpg", frameNrIn); + snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.jpg" : "frame%06d.jpg"), frameNrIn); boost::filesystem::path imgPath = outPath / buff; if ( !boost::filesystem::exists( imgPath.native() )|| frameNrIn > frameNrOut ) { // wait for file to appear @@ -257,7 +259,7 @@ int main(int argsc, char ** argsv) std::cerr << "Read " << imgPath.native() << std::endl; char buff[100]; - snprintf(buff, sizeof(buff), "frame%06d.json", frameNrIn); + snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrIn); boost::filesystem::path jsonPath = outPath / buff; // don't redo existing jsons @@ -300,7 +302,7 @@ int main(int argsc, char ** argsv) // store json char buff[100]; - snprintf(buff, sizeof(buff), "frame%06d.json", frameNrOut); + snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrOut); boost::filesystem::path targetFilename = outPath / buff; std::ofstream out(targetFilename.native()); std::cerr << "write "<< targetFilename.native() << std::endl; diff --git a/parse_output.py b/parse_output.py index 008c4ea..d18dc15 100644 --- a/parse_output.py +++ b/parse_output.py @@ -1,18 +1,50 @@ import os -from PIL import Image, ImageDraw +from PIL import Image, ImageDraw,ImageTk import argparse import json import time import glob import numpy as np +import Tkinter + + + + +facialParameters = [ + "smile", + "innerBrowRaise", + "browRaise", + "browFurrow", + "noseWrinkle", + "upperLipRaise", + "lipCornerDepressor", + "chinRaise", + "lipPucker", + "lipPress", + "lipSuck", + "mouthOpen", + "smirk", + #~ "attention", + "eyeClosure", + "eyeWiden", + "cheekRaise", + "lidTighten", + "dimpler", + "lipStretch", + "jawDrop", +] parser = argparse.ArgumentParser(description='Parses opencv-webcam-demo json output files and collects statistics') parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json') -parser.add_argument('--status', '-s', action='store_true', help='Keep status of last frame') +parser.add_argument('--status', action='store_true', help='Keep status of last frame') parser.add_argument('--cutAllFaces', action='store_true', help='Cut out all faces from all frames') parser.add_argument('--sum', action='store_true', help='Get total scores over all time') +parser.add_argument('--unique', action='store_true', help='Get most unique window') +parser.add_argument('--avg', action='store_true', help='Get most average window') parser.add_argument('--disonant', action='store_true', help='Get most disonant faces over time') +parser.add_argument('--window-size', '-s', type=int, default=10, help='The nr of frames to group in one sliding window for analysis') +parser.add_argument("--params", "-p", type=str, nargs='+', default=facialParameters, choices=facialParameters, help="The parameters used to calculate the statistics") args = parser.parse_args() @@ -29,30 +61,8 @@ class Face: r = self.data['rect'] return self.frame.getImg().crop((int(r['x']), int(r['y']), int(r['x']+r['w']), int(r['y']+r['h']))) - def getCharacteristicVector(self): - self.vector = np.array([ - self.data["smile"], - self.data["innerBrowRaise"], - self.data["browRaise"], - self.data["browFurrow"], - self.data["noseWrinkle"], - self.data["upperLipRaise"], - self.data["lipCornerDepressor"], - self.data["chinRaise"], - self.data["lipPucker"], - self.data["lipPress"], - self.data["lipSuck"], - self.data["mouthOpen"], - self.data["smirk"], - self.data["eyeClosure"], - # self.data["attention"], - self.data["eyeWiden"], - self.data["cheekRaise"], - self.data["lidTighten"], - self.data["dimpler"], - self.data["lipStretch"], - self.data["jawDrop"], - ]) + def getCharacteristicVector(self, params): + self.vector = [self.data[p] for p in params] return self.vector def setAnomalyScore(self, score): @@ -81,8 +91,17 @@ class Frame: #~ # no json file yet? #~ return None - def getImg(self): - return Image.open(self.imgPath) + def getImg(self, markFaces = True): + img = Image.open(self.imgPath) + if not markFaces: + return img + + draw = ImageDraw.Draw(img) + for f in self.faces: + xy1 = (int(f.data['rect']['x']), int(f.data['rect']['y'])) + xy2 = (int(f.data['rect']['x'] + f.data['rect']['w']), int(f.data['rect']['y'] + f.data['rect']['h'])) + draw.rectangle([xy1, xy2], outline="#ff0000") + return img def getFaces(self): if self.faces is None: @@ -108,13 +127,13 @@ class Frame: for face in self.getFaces(): face.disonanceScore = abs(face.data['valence'] - avgValence) - def getAverageV(self): - vectors = [face.getCharacteristicVector() for face in self.getFaces()] + def getAverageV(self, params): + vectors = [face.getCharacteristicVector(params) for face in self.getFaces()] vAvg = np.mean(vectors, axis=0) return vAvg - def updateAnomalyScores(self): - vAvg = self.getAverageV() + def updateAnomalyScores(self, params): + vAvg = self.getAverageV(params) for face in self.getFaces(): face.setAnomalyScore(np.linalg.norm(face.getCharacteristicVector() - vAvg)) @@ -124,6 +143,88 @@ class Frame: frames = {} +class Window: + def __init__(self, frameSubset): + """ + Init a sliding window for given Frame-s + """ + self.frames = frameSubset + self.deviation = None + self.standardDeviation = None + + def getFaces(self): + faces = [] + for frame in self.frames: + faces.extend(frame.getFaces()) + return faces + + def getStdDev(self, params): + """ + Get standard deviation of the faces within the window for given params + """ + vectors = [f.getCharacteristicVector(params) for f in self.getFaces()] + return np.std(vectors) + + def getAverageV(self, params): + vectors = [f.getCharacteristicVector(params) for f in self.getFaces()] + vAvg = np.mean(vectors, axis=0) + return vAvg + + @staticmethod + def createWindows(windowSize, frames): + """ + Give a full list of frames and tunrn it into a collection of sliding windows + """ + frames = sorted(frames.items(), key=lambda f: f[0]) + frames = [f[1] for f in frames] + windows = [] + windowCount = len(frames) - windowSize + 1 + if windowCount < 1: + raise Exception("Not enough frames ({}) for a window of size {}".format(len(frames), windowSize)) + + for offset in range(0, windowCount): + frameSubset = frames[offset:offset+windowSize] + windows.append(Window(frameSubset)) + + return windows + +class WindowCollection: + def __init__(self, windowSize, frames): + self.windows = Window.createWindows(windowSize, frames) + self.frames = frames + #~ self.faces = [face for face in frame.getFaces() for frame in frames] + + #~ def getMostWindowsClosestToMedian(self, nr = 5): + #~ """ + #~ Get windows with the faces closest to the median + #~ """ + #~ self.faces + + def getWindowVectors(self, params): + return [window.getAverageV(params) for window in self.windows] + + def getWindowsByDeviation(self, params): + vectors = self.getWindowVectors(params) + vAvg = np.mean(vectors, axis=0) + #~ diffs = [numpy.linalg.norm(v-vAvg) for v in vectors] + #~ min_index, min_value = min(enumerate(diffs), key=lambda p: p[1]) + #~ max_index, max_value = max(enumerate(diffs), key=lambda p: p[1]) + + return sorted(self.windows, key=lambda w: np.linalg.norm(w.getAverageV(params)-vAvg)) + + def getUniqueWindows(self, params, nr=5): + windows = self.getWindowsByDeviation(params) + return windows[0: nr] + + def getMostAvgWindows(self, params, nr=5): + windows = self.getWindowsByDeviation(params) + windows.reverse() + return windows[0:nr] + + def getMostContrastingWindows(self, params, nr=5): + sortedWindows = sorted(self.windows, key=lambda w: w.getStdDev(params), reverse=True) + return sortedWindows[0:nr] + def loadFrames(frameDir): global frames nr = 2 @@ -185,9 +286,9 @@ def getMostDisonant(nr = 5): print("Frame %d, face %d, score %d, valence %d" % (face.frame.nr, face.id, face.disonanceScore, face.data['valence'])) face.getFaceImg().show() -def getAnomalies(nr = 5): +def getAnomalies(params, nr = 5): for frameNr, frame in loadFrames(args.frameOutput).items(): - frame.updateAnomalyScores() + frame.updateAnomalyScores(params) faces.sort(key=lambda x: x.anomalyScore, reverse=True) anomalies = faces[:nr] @@ -196,10 +297,10 @@ def getAnomalies(nr = 5): #~ getCharacteristicVector face.getFaceImg().show() -def printFrameStats(frame): +def printFrameStats(frame, params): os.system('clear') print(time.time()) - print( ("Nr: %d" % frame.nr).ljust(40) + ("t: %f" % frame.getJson()['t']) ) + print( ("Nr: %d" % frame.nr).ljust(40) + ("t: {}".format(frame.getJson()['t'])) ) #~ print faces = frame.getFaces() print("Faces: %d" % len(faces)) @@ -207,24 +308,16 @@ def printFrameStats(frame): if len(faces) < 1: return - params = ['smile', 'browFurrow'] + print " ".ljust(20), "0%".rjust(13), "q1".rjust(13), "median".rjust(13), "q3".rjust(13), "100%".rjust(13) - q0s = [np.percentile(np.array([f.data[param] for f in faces]),0) for param in params] - q1s = [np.percentile(np.array([f.data[param] for f in faces]),25) for param in params] - q2s = [np.percentile(np.array([f.data[param] for f in faces]),50) for param in params] - q3s = [np.percentile(np.array([f.data[param] for f in faces]),75) for param in params] - q4s = [np.percentile(np.array([f.data[param] for f in faces]),100) for param in params] - - print " ".ljust(8), for p in params: - print p.center(20), - print "" + q0 = np.percentile(np.array([f.data[p] for f in faces]),0) + q1 = np.percentile(np.array([f.data[p] for f in faces]),25) + q2 = np.percentile(np.array([f.data[p] for f in faces]),50) + q3 = np.percentile(np.array([f.data[p] for f in faces]),75) + q4 = np.percentile(np.array([f.data[p] for f in faces]),100) + print p.ljust(20), ("%f%%" % q0).rjust(13), ("%f%%" % q1).rjust(13),("%f%%" % q2).rjust(13),("%f%%" % q3).rjust(13),("%f%%" % q4).rjust(13) - print(" 0% " + "".join([("%f%%" % q).rjust(20) for q in q0s])) - print(" q1 " + "".join([("%f%%" % q).rjust(20) for q in q1s])) - print(" median " + "".join([("%f%%" % q).rjust(20) for q in q2s])) - print(" q3 " + "".join([("%f%%" % q).rjust(20) for q in q3s])) - print(" 100% " + "".join([("%f%%" % q).rjust(20) for q in q4s])) #~ TODO: speaker stats @@ -233,15 +326,37 @@ def printFrameStats(frame): #~ dissonantFace.getFaceImg() -def monitorStatus(frameDir): +def monitorStatus(frameDir, params): while True: frame = getLastFrame(frameDir) if not frame is None: - printFrameStats(frame) + printFrameStats(frame, params) # don't check too often time.sleep(.5) - + +def playWindowStopmotion(window): + """ + Play a set of sliding window frames as stop motion video + """ + root = Tkinter.Tk() + root.geometry('%dx%d+%d+%d' % (1000,1000,0,0)) + canvas = Tkinter.Canvas(root,width=1000,height=1000) + canvas.pack() + + old_label_image = None + for frame in window.frames: + image = frame.getImg() + basewidth = 1000 + wpercent = (basewidth / float(image.size[0])) + hsize = int((float(image.size[1]) * float(wpercent))) + image = image.resize((basewidth, hsize), Image.ANTIALIAS) + + tkpi = ImageTk.PhotoImage(image) + canvas.delete("IMG") + imagesprite = canvas.create_image(500,500,image=tkpi, tags="IMG") + root.update() + time.sleep(1) validateJsonTimes() @@ -259,6 +374,17 @@ if args.cutAllFaces: for frameNr, frame in loadFrames(args.frameOutput).items(): cutOutFaces(faceDir) +if args.unique: + collection = WindowCollection(args.window_size, frames) + windows = collection.getUniqueWindows(args.params) + #~ print(windows) + playWindowStopmotion(windows[0]) + +if args.avg: + collection = WindowCollection(args.window_size, frames) + windows = collection.getMostAvgWindows(args.params) + #~ print(windows) + playWindowStopmotion(windows[0]) if args.status: - monitorStatus(args.frameOutput) + monitorStatus(args.frameOutput, args.params) diff --git a/split_and_merge_output.py b/split_and_merge_output.py new file mode 100644 index 0000000..410c19d --- /dev/null +++ b/split_and_merge_output.py @@ -0,0 +1,91 @@ +import argparse +import os +from PIL import Image +import time +import json + +parser = argparse.ArgumentParser(description=""" +Frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files) split_and_merge_output.py also remerges these segment%06.json files into a frame%06d.json file. +""") +parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json') +parser.add_argument('--segmentDir', required=True, help='directory to write segfor frames & json') + +args = parser.parse_args() + +if not os.path.exists(args.frameOutput): + raise Exception(args.frameOutput + " does not exist") +if not os.path.exists(args.segmentDir): + raise Exception(args.segmentDir + " does not exist") + +def getSectionsForSize(size): + """ + Get sections based on image size, in a tuple format (width, height) + """ + return [ + ( 0, 0, int(size[0]/1.85), int(size[1]/1.85) ), + ( int(size[0]/2.1), 0, size[0], int(size[1]/1.85) ), + ( 0, int(size[1]/2.1), int(size[0]/1.85), size[1] ), + ( int(size[0]/2.1), int(size[1]/2.1), size[0], size[1] ), + ] + +nextInNr = 1 +segmentCount = 4 # cannot really be changed for now +nextOutNr = segmentCount +while True: + nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr) + nextInJson = os.path.join(args.frameOutput, "frame%06d.json" % nextInNr) + while os.path.exists(nextJpg) and not os.path.exists(nextInJson): + print("(SPLIT) Found: {}".format(nextJpg)) + img = Image.open(nextJpg) + + segments = getSectionsForSize(img.size) + + for i, segment in enumerate(segments): + segmentImg = img.crop(segment) + segmentNr = (nextInNr-1) * segmentCount + i + 1 # start at 1, increment from there + segmentFilename = os.path.join(args.segmentDir, "segment%06d.jpg" % segmentNr) + segmentImg.save(segmentFilename, quality=90) + print("(SPLIT) Created: {}".format(segmentFilename)) + + nextInNr += 1 + nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr) + nextInJson = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr) + + nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr) + while os.path.exists(nextJson): + print("(MERGE) Found: {}".format(nextJson)) + frameOutNr = (nextOutNr - 1) / segmentCount + 1 # 1,2,3,4 -> 1; 5,6,7,8 -> 2 (uses trick to divide ints: 7/4=1) + img = Image.open( os.path.join(args.frameOutput, "frame%06d.jpg" % frameOutNr) ) + segments = getSectionsForSize(img.size) + creationTime = img._getexif()[36867] + + faces = [] + times = [] + + for i, segment in enumerate(segments): + segmentNr = nextOutNr - segmentCount + i + 1 # nextOutNr: 4 => 1,2,3,4 + with open( os.path.join(args.segmentDir, "segment%06d.json" % segmentNr) ) as fp: + j = json.load(fp) + + for f in j['faces']: + f['rect']['x'] += segment[0] + f['rect']['y'] += segment[1] + f['id'] = "%d%02d" % ((i+1), f['id']) + faces.append(f) + times.append(j['t']) + + data = { + 'nr': int(frameOutNr), + 't': creationTime, + 'faces': faces, + 'sectionTimes': times, + } + jsonOutFilename = os.path.join(args.frameOutput, "frame%06d.json" % frameOutNr) + with open(jsonOutFilename, 'w') as fp: + json.dump(data, fp) + print("(MERGE) Wrote: {}".format(jsonOutFilename)) + + nextOutNr += segmentCount + nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr) + + time.sleep(0.2)