Add possibility to use segments for beter face detection

This commit is contained in:
Ruben 2017-11-24 12:41:38 +01:00
parent ff4ccd7410
commit 3b0e0eda16
4 changed files with 281 additions and 58 deletions

View File

@ -7,3 +7,7 @@ gphoto2 to capture images:
The modified 'webcam demo' to analyse and generate json: The modified 'webcam demo' to analyse and generate json:
`/home/crowd/build/opencv-webcam-demo/opencv-webcam-demo --data /home/crowd/affdex-sdk/data --faceMode 1 --numFaces 80 -o /home/crowd/output-backup/ --draw 0` `/home/crowd/build/opencv-webcam-demo/opencv-webcam-demo --data /home/crowd/affdex-sdk/data --faceMode 1 --numFaces 80 -o /home/crowd/output-backup/ --draw 0`
Using split_and_merge_output, frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files)
split_and_merge_output also remerges these segment%06.json files into a frame%06d.json file.

View File

@ -143,6 +143,7 @@ int main(int argsc, char ** argsv)
int buffer_length = 2; int buffer_length = 2;
unsigned int nFaces = 1; unsigned int nFaces = 1;
bool draw_display = true; bool draw_display = true;
bool use_segments = false;
int faceDetectorMode = (int)FaceDetectorMode::SMALL_FACES; int faceDetectorMode = (int)FaceDetectorMode::SMALL_FACES;
boost::filesystem::path imgPath("~/emo_in_file.jpg"); boost::filesystem::path imgPath("~/emo_in_file.jpg");
boost::filesystem::path outPath("~/output/"); boost::filesystem::path outPath("~/output/");
@ -167,6 +168,7 @@ int main(int argsc, char ** argsv)
("faceMode", po::value< int >(&faceDetectorMode)->default_value((int)FaceDetectorMode::SMALL_FACES), "Face detector mode (large faces vs small faces).") ("faceMode", po::value< int >(&faceDetectorMode)->default_value((int)FaceDetectorMode::SMALL_FACES), "Face detector mode (large faces vs small faces).")
("numFaces", po::value< unsigned int >(&nFaces)->default_value(1), "Number of faces to be tracked.") ("numFaces", po::value< unsigned int >(&nFaces)->default_value(1), "Number of faces to be tracked.")
("draw", po::value< bool >(&draw_display)->default_value(true), "Draw metrics on screen.") ("draw", po::value< bool >(&draw_display)->default_value(true), "Draw metrics on screen.")
("segments", po::value< bool >(&use_segments)->default_value(use_segments), "Use 'segments' rather than 'frames' (influences detecting frame%06d or segment%06d).")
//~ ("file,f", po::value< boost::filesystem::path >(&imgPath)->default_value(imgPath), "Filename of image that is watched/tracked for changes.") //~ ("file,f", po::value< boost::filesystem::path >(&imgPath)->default_value(imgPath), "Filename of image that is watched/tracked for changes.")
("frameOutput,o", po::value< boost::filesystem::path >(&outPath)->default_value(outPath), "Directory to store the frame in (and json)") ("frameOutput,o", po::value< boost::filesystem::path >(&outPath)->default_value(outPath), "Directory to store the frame in (and json)")
; ;
@ -247,7 +249,7 @@ int main(int argsc, char ** argsv)
int seconds = 1; int seconds = 1;
while(true){ //(cv::waitKey(20) != -1); while(true){ //(cv::waitKey(20) != -1);
char buff[100]; char buff[100];
snprintf(buff, sizeof(buff), "frame%06d.jpg", frameNrIn); snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.jpg" : "frame%06d.jpg"), frameNrIn);
boost::filesystem::path imgPath = outPath / buff; boost::filesystem::path imgPath = outPath / buff;
if ( !boost::filesystem::exists( imgPath.native() )|| frameNrIn > frameNrOut ) { if ( !boost::filesystem::exists( imgPath.native() )|| frameNrIn > frameNrOut ) {
// wait for file to appear // wait for file to appear
@ -257,7 +259,7 @@ int main(int argsc, char ** argsv)
std::cerr << "Read " << imgPath.native() << std::endl; std::cerr << "Read " << imgPath.native() << std::endl;
char buff[100]; char buff[100];
snprintf(buff, sizeof(buff), "frame%06d.json", frameNrIn); snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrIn);
boost::filesystem::path jsonPath = outPath / buff; boost::filesystem::path jsonPath = outPath / buff;
// don't redo existing jsons // don't redo existing jsons
@ -300,7 +302,7 @@ int main(int argsc, char ** argsv)
// store json // store json
char buff[100]; char buff[100];
snprintf(buff, sizeof(buff), "frame%06d.json", frameNrOut); snprintf(buff, sizeof(buff), (use_segments ? "segment%06d.json" : "frame%06d.json"), frameNrOut);
boost::filesystem::path targetFilename = outPath / buff; boost::filesystem::path targetFilename = outPath / buff;
std::ofstream out(targetFilename.native()); std::ofstream out(targetFilename.native());
std::cerr << "write "<< targetFilename.native() << std::endl; std::cerr << "write "<< targetFilename.native() << std::endl;

View File

@ -1,18 +1,50 @@
import os import os
from PIL import Image, ImageDraw from PIL import Image, ImageDraw,ImageTk
import argparse import argparse
import json import json
import time import time
import glob import glob
import numpy as np import numpy as np
import Tkinter
facialParameters = [
"smile",
"innerBrowRaise",
"browRaise",
"browFurrow",
"noseWrinkle",
"upperLipRaise",
"lipCornerDepressor",
"chinRaise",
"lipPucker",
"lipPress",
"lipSuck",
"mouthOpen",
"smirk",
#~ "attention",
"eyeClosure",
"eyeWiden",
"cheekRaise",
"lidTighten",
"dimpler",
"lipStretch",
"jawDrop",
]
parser = argparse.ArgumentParser(description='Parses opencv-webcam-demo json output files and collects statistics') parser = argparse.ArgumentParser(description='Parses opencv-webcam-demo json output files and collects statistics')
parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json') parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json')
parser.add_argument('--status', '-s', action='store_true', help='Keep status of last frame') parser.add_argument('--status', action='store_true', help='Keep status of last frame')
parser.add_argument('--cutAllFaces', action='store_true', help='Cut out all faces from all frames') parser.add_argument('--cutAllFaces', action='store_true', help='Cut out all faces from all frames')
parser.add_argument('--sum', action='store_true', help='Get total scores over all time') parser.add_argument('--sum', action='store_true', help='Get total scores over all time')
parser.add_argument('--unique', action='store_true', help='Get most unique window')
parser.add_argument('--avg', action='store_true', help='Get most average window')
parser.add_argument('--disonant', action='store_true', help='Get most disonant faces over time') parser.add_argument('--disonant', action='store_true', help='Get most disonant faces over time')
parser.add_argument('--window-size', '-s', type=int, default=10, help='The nr of frames to group in one sliding window for analysis')
parser.add_argument("--params", "-p", type=str, nargs='+', default=facialParameters, choices=facialParameters, help="The parameters used to calculate the statistics")
args = parser.parse_args() args = parser.parse_args()
@ -29,30 +61,8 @@ class Face:
r = self.data['rect'] r = self.data['rect']
return self.frame.getImg().crop((int(r['x']), int(r['y']), int(r['x']+r['w']), int(r['y']+r['h']))) return self.frame.getImg().crop((int(r['x']), int(r['y']), int(r['x']+r['w']), int(r['y']+r['h'])))
def getCharacteristicVector(self): def getCharacteristicVector(self, params):
self.vector = np.array([ self.vector = [self.data[p] for p in params]
self.data["smile"],
self.data["innerBrowRaise"],
self.data["browRaise"],
self.data["browFurrow"],
self.data["noseWrinkle"],
self.data["upperLipRaise"],
self.data["lipCornerDepressor"],
self.data["chinRaise"],
self.data["lipPucker"],
self.data["lipPress"],
self.data["lipSuck"],
self.data["mouthOpen"],
self.data["smirk"],
self.data["eyeClosure"],
# self.data["attention"],
self.data["eyeWiden"],
self.data["cheekRaise"],
self.data["lidTighten"],
self.data["dimpler"],
self.data["lipStretch"],
self.data["jawDrop"],
])
return self.vector return self.vector
def setAnomalyScore(self, score): def setAnomalyScore(self, score):
@ -81,8 +91,17 @@ class Frame:
#~ # no json file yet? #~ # no json file yet?
#~ return None #~ return None
def getImg(self): def getImg(self, markFaces = True):
return Image.open(self.imgPath) img = Image.open(self.imgPath)
if not markFaces:
return img
draw = ImageDraw.Draw(img)
for f in self.faces:
xy1 = (int(f.data['rect']['x']), int(f.data['rect']['y']))
xy2 = (int(f.data['rect']['x'] + f.data['rect']['w']), int(f.data['rect']['y'] + f.data['rect']['h']))
draw.rectangle([xy1, xy2], outline="#ff0000")
return img
def getFaces(self): def getFaces(self):
if self.faces is None: if self.faces is None:
@ -108,13 +127,13 @@ class Frame:
for face in self.getFaces(): for face in self.getFaces():
face.disonanceScore = abs(face.data['valence'] - avgValence) face.disonanceScore = abs(face.data['valence'] - avgValence)
def getAverageV(self): def getAverageV(self, params):
vectors = [face.getCharacteristicVector() for face in self.getFaces()] vectors = [face.getCharacteristicVector(params) for face in self.getFaces()]
vAvg = np.mean(vectors, axis=0) vAvg = np.mean(vectors, axis=0)
return vAvg return vAvg
def updateAnomalyScores(self): def updateAnomalyScores(self, params):
vAvg = self.getAverageV() vAvg = self.getAverageV(params)
for face in self.getFaces(): for face in self.getFaces():
face.setAnomalyScore(np.linalg.norm(face.getCharacteristicVector() - vAvg)) face.setAnomalyScore(np.linalg.norm(face.getCharacteristicVector() - vAvg))
@ -124,6 +143,88 @@ class Frame:
frames = {} frames = {}
class Window:
def __init__(self, frameSubset):
"""
Init a sliding window for given Frame-s
"""
self.frames = frameSubset
self.deviation = None
self.standardDeviation = None
def getFaces(self):
faces = []
for frame in self.frames:
faces.extend(frame.getFaces())
return faces
def getStdDev(self, params):
"""
Get standard deviation of the faces within the window for given params
"""
vectors = [f.getCharacteristicVector(params) for f in self.getFaces()]
return np.std(vectors)
def getAverageV(self, params):
vectors = [f.getCharacteristicVector(params) for f in self.getFaces()]
vAvg = np.mean(vectors, axis=0)
return vAvg
@staticmethod
def createWindows(windowSize, frames):
"""
Give a full list of frames and tunrn it into a collection of sliding windows
"""
frames = sorted(frames.items(), key=lambda f: f[0])
frames = [f[1] for f in frames]
windows = []
windowCount = len(frames) - windowSize + 1
if windowCount < 1:
raise Exception("Not enough frames ({}) for a window of size {}".format(len(frames), windowSize))
for offset in range(0, windowCount):
frameSubset = frames[offset:offset+windowSize]
windows.append(Window(frameSubset))
return windows
class WindowCollection:
def __init__(self, windowSize, frames):
self.windows = Window.createWindows(windowSize, frames)
self.frames = frames
#~ self.faces = [face for face in frame.getFaces() for frame in frames]
#~ def getMostWindowsClosestToMedian(self, nr = 5):
#~ """
#~ Get windows with the faces closest to the median
#~ """
#~ self.faces
def getWindowVectors(self, params):
return [window.getAverageV(params) for window in self.windows]
def getWindowsByDeviation(self, params):
vectors = self.getWindowVectors(params)
vAvg = np.mean(vectors, axis=0)
#~ diffs = [numpy.linalg.norm(v-vAvg) for v in vectors]
#~ min_index, min_value = min(enumerate(diffs), key=lambda p: p[1])
#~ max_index, max_value = max(enumerate(diffs), key=lambda p: p[1])
return sorted(self.windows, key=lambda w: np.linalg.norm(w.getAverageV(params)-vAvg))
def getUniqueWindows(self, params, nr=5):
windows = self.getWindowsByDeviation(params)
return windows[0: nr]
def getMostAvgWindows(self, params, nr=5):
windows = self.getWindowsByDeviation(params)
windows.reverse()
return windows[0:nr]
def getMostContrastingWindows(self, params, nr=5):
sortedWindows = sorted(self.windows, key=lambda w: w.getStdDev(params), reverse=True)
return sortedWindows[0:nr]
def loadFrames(frameDir): def loadFrames(frameDir):
global frames global frames
nr = 2 nr = 2
@ -185,9 +286,9 @@ def getMostDisonant(nr = 5):
print("Frame %d, face %d, score %d, valence %d" % (face.frame.nr, face.id, face.disonanceScore, face.data['valence'])) print("Frame %d, face %d, score %d, valence %d" % (face.frame.nr, face.id, face.disonanceScore, face.data['valence']))
face.getFaceImg().show() face.getFaceImg().show()
def getAnomalies(nr = 5): def getAnomalies(params, nr = 5):
for frameNr, frame in loadFrames(args.frameOutput).items(): for frameNr, frame in loadFrames(args.frameOutput).items():
frame.updateAnomalyScores() frame.updateAnomalyScores(params)
faces.sort(key=lambda x: x.anomalyScore, reverse=True) faces.sort(key=lambda x: x.anomalyScore, reverse=True)
anomalies = faces[:nr] anomalies = faces[:nr]
@ -196,10 +297,10 @@ def getAnomalies(nr = 5):
#~ getCharacteristicVector #~ getCharacteristicVector
face.getFaceImg().show() face.getFaceImg().show()
def printFrameStats(frame): def printFrameStats(frame, params):
os.system('clear') os.system('clear')
print(time.time()) print(time.time())
print( ("Nr: %d" % frame.nr).ljust(40) + ("t: %f" % frame.getJson()['t']) ) print( ("Nr: %d" % frame.nr).ljust(40) + ("t: {}".format(frame.getJson()['t'])) )
#~ print #~ print
faces = frame.getFaces() faces = frame.getFaces()
print("Faces: %d" % len(faces)) print("Faces: %d" % len(faces))
@ -207,24 +308,16 @@ def printFrameStats(frame):
if len(faces) < 1: if len(faces) < 1:
return return
params = ['smile', 'browFurrow'] print " ".ljust(20), "0%".rjust(13), "q1".rjust(13), "median".rjust(13), "q3".rjust(13), "100%".rjust(13)
q0s = [np.percentile(np.array([f.data[param] for f in faces]),0) for param in params]
q1s = [np.percentile(np.array([f.data[param] for f in faces]),25) for param in params]
q2s = [np.percentile(np.array([f.data[param] for f in faces]),50) for param in params]
q3s = [np.percentile(np.array([f.data[param] for f in faces]),75) for param in params]
q4s = [np.percentile(np.array([f.data[param] for f in faces]),100) for param in params]
print " ".ljust(8),
for p in params: for p in params:
print p.center(20), q0 = np.percentile(np.array([f.data[p] for f in faces]),0)
print "" q1 = np.percentile(np.array([f.data[p] for f in faces]),25)
q2 = np.percentile(np.array([f.data[p] for f in faces]),50)
q3 = np.percentile(np.array([f.data[p] for f in faces]),75)
q4 = np.percentile(np.array([f.data[p] for f in faces]),100)
print p.ljust(20), ("%f%%" % q0).rjust(13), ("%f%%" % q1).rjust(13),("%f%%" % q2).rjust(13),("%f%%" % q3).rjust(13),("%f%%" % q4).rjust(13)
print(" 0% " + "".join([("%f%%" % q).rjust(20) for q in q0s]))
print(" q1 " + "".join([("%f%%" % q).rjust(20) for q in q1s]))
print(" median " + "".join([("%f%%" % q).rjust(20) for q in q2s]))
print(" q3 " + "".join([("%f%%" % q).rjust(20) for q in q3s]))
print(" 100% " + "".join([("%f%%" % q).rjust(20) for q in q4s]))
#~ TODO: speaker stats #~ TODO: speaker stats
@ -233,15 +326,37 @@ def printFrameStats(frame):
#~ dissonantFace.getFaceImg() #~ dissonantFace.getFaceImg()
def monitorStatus(frameDir): def monitorStatus(frameDir, params):
while True: while True:
frame = getLastFrame(frameDir) frame = getLastFrame(frameDir)
if not frame is None: if not frame is None:
printFrameStats(frame) printFrameStats(frame, params)
# don't check too often # don't check too often
time.sleep(.5) time.sleep(.5)
def playWindowStopmotion(window):
"""
Play a set of sliding window frames as stop motion video
"""
root = Tkinter.Tk()
root.geometry('%dx%d+%d+%d' % (1000,1000,0,0))
canvas = Tkinter.Canvas(root,width=1000,height=1000)
canvas.pack()
old_label_image = None
for frame in window.frames:
image = frame.getImg()
basewidth = 1000
wpercent = (basewidth / float(image.size[0]))
hsize = int((float(image.size[1]) * float(wpercent)))
image = image.resize((basewidth, hsize), Image.ANTIALIAS)
tkpi = ImageTk.PhotoImage(image)
canvas.delete("IMG")
imagesprite = canvas.create_image(500,500,image=tkpi, tags="IMG")
root.update()
time.sleep(1)
validateJsonTimes() validateJsonTimes()
@ -259,6 +374,17 @@ if args.cutAllFaces:
for frameNr, frame in loadFrames(args.frameOutput).items(): for frameNr, frame in loadFrames(args.frameOutput).items():
cutOutFaces(faceDir) cutOutFaces(faceDir)
if args.unique:
collection = WindowCollection(args.window_size, frames)
windows = collection.getUniqueWindows(args.params)
#~ print(windows)
playWindowStopmotion(windows[0])
if args.avg:
collection = WindowCollection(args.window_size, frames)
windows = collection.getMostAvgWindows(args.params)
#~ print(windows)
playWindowStopmotion(windows[0])
if args.status: if args.status:
monitorStatus(args.frameOutput) monitorStatus(args.frameOutput, args.params)

91
split_and_merge_output.py Normal file
View File

@ -0,0 +1,91 @@
import argparse
import os
from PIL import Image
import time
import json
parser = argparse.ArgumentParser(description="""
Frames are split into segments so SMALL_FACES actually detects _really_ small faces. This requires enabling --segments on opencv-webcam-demo (so it detects/writes segment%06d files) split_and_merge_output.py also remerges these segment%06.json files into a frame%06d.json file.
""")
parser.add_argument('--frameOutput', '-o', required=True, help='directory to look for frames & json')
parser.add_argument('--segmentDir', required=True, help='directory to write segfor frames & json')
args = parser.parse_args()
if not os.path.exists(args.frameOutput):
raise Exception(args.frameOutput + " does not exist")
if not os.path.exists(args.segmentDir):
raise Exception(args.segmentDir + " does not exist")
def getSectionsForSize(size):
"""
Get sections based on image size, in a tuple format (width, height)
"""
return [
( 0, 0, int(size[0]/1.85), int(size[1]/1.85) ),
( int(size[0]/2.1), 0, size[0], int(size[1]/1.85) ),
( 0, int(size[1]/2.1), int(size[0]/1.85), size[1] ),
( int(size[0]/2.1), int(size[1]/2.1), size[0], size[1] ),
]
nextInNr = 1
segmentCount = 4 # cannot really be changed for now
nextOutNr = segmentCount
while True:
nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
nextInJson = os.path.join(args.frameOutput, "frame%06d.json" % nextInNr)
while os.path.exists(nextJpg) and not os.path.exists(nextInJson):
print("(SPLIT) Found: {}".format(nextJpg))
img = Image.open(nextJpg)
segments = getSectionsForSize(img.size)
for i, segment in enumerate(segments):
segmentImg = img.crop(segment)
segmentNr = (nextInNr-1) * segmentCount + i + 1 # start at 1, increment from there
segmentFilename = os.path.join(args.segmentDir, "segment%06d.jpg" % segmentNr)
segmentImg.save(segmentFilename, quality=90)
print("(SPLIT) Created: {}".format(segmentFilename))
nextInNr += 1
nextJpg = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
nextInJson = os.path.join(args.frameOutput, "frame%06d.jpg" % nextInNr)
nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr)
while os.path.exists(nextJson):
print("(MERGE) Found: {}".format(nextJson))
frameOutNr = (nextOutNr - 1) / segmentCount + 1 # 1,2,3,4 -> 1; 5,6,7,8 -> 2 (uses trick to divide ints: 7/4=1)
img = Image.open( os.path.join(args.frameOutput, "frame%06d.jpg" % frameOutNr) )
segments = getSectionsForSize(img.size)
creationTime = img._getexif()[36867]
faces = []
times = []
for i, segment in enumerate(segments):
segmentNr = nextOutNr - segmentCount + i + 1 # nextOutNr: 4 => 1,2,3,4
with open( os.path.join(args.segmentDir, "segment%06d.json" % segmentNr) ) as fp:
j = json.load(fp)
for f in j['faces']:
f['rect']['x'] += segment[0]
f['rect']['y'] += segment[1]
f['id'] = "%d%02d" % ((i+1), f['id'])
faces.append(f)
times.append(j['t'])
data = {
'nr': int(frameOutNr),
't': creationTime,
'faces': faces,
'sectionTimes': times,
}
jsonOutFilename = os.path.join(args.frameOutput, "frame%06d.json" % frameOutNr)
with open(jsonOutFilename, 'w') as fp:
json.dump(data, fp)
print("(MERGE) Wrote: {}".format(jsonOutFilename))
nextOutNr += segmentCount
nextJson = os.path.join(args.segmentDir, "segment%06d.json" % nextOutNr)
time.sleep(0.2)