ROI, face distance and demo mode CLNF file.

2016-10-06 16:17:33 -04:00 · 2016-10-06 16:17:33 -04:00 · 8a30e55f4b
commit 8a30e55f4b
parent 99cc06631f
9 changed files with 119 additions and 54 deletions
--- a/gui/OpenFaceDemo/MainWindow.xaml.cs
+++ b/gui/OpenFaceDemo/MainWindow.xaml.cs
@ -86,7 +86,7 @@ namespace OpenFaceDemo

            String root = AppDomain.CurrentDomain.BaseDirectory;

-            clnf_params = new FaceModelParameters(root);
+            clnf_params = new FaceModelParameters(root, true);
            clnf_model = new CLNF(clnf_params);
            face_analyser = new FaceAnalyserManaged(root, true);

@ -259,7 +259,10 @@ namespace OpenFaceDemo
                    confidence = 1;

                List<double> pose = new List<double>();
+
+
                clnf_model.GetCorrectedPoseCamera(pose, fx, fy, cx, cy);
+
                List<double> non_rigid_params = clnf_model.GetNonRigidParams();
                double scale = clnf_model.GetRigidParams()[0];

@ -277,8 +280,8 @@ namespace OpenFaceDemo
                double y_gaze = (Math.Atan2(gaze.Item1.Item2, -gaze.Item1.Item3) + Math.Atan2(gaze.Item2.Item2, -gaze.Item2.Item3)) / 2.0;

                // Scaling for clearer vis.
-                x_gaze *= 2;
-                y_gaze *= 2;
+                x_gaze *= 2.5;
+                y_gaze *= 2.5;

                if (detectionSucceeding)
                {
@ -339,17 +342,7 @@ namespace OpenFaceDemo

                    old_gaze_x = gazeDict[0];
                    old_gaze_y = gazeDict[1];
-
-                    //Dictionary<int, double> valenceDict = new Dictionary<int, double>();
-                    //valenceDict[0] = (face_analyser.GetValence() - 1.0) / 6.5;
-                    //valenceDict[1] = face_analyser.GetArousal();
-                    //valencePlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = valenceDict, Confidence = confidence });
-
-                    //Dictionary<int, double> avDict = new Dictionary<int, double>();
-                    //avDict[0] = (face_analyser.GetArousal() - 0.5) * 2.0;
-                    //avDict[1] = ((face_analyser.GetValence() - 1.0) / 6.5 - 0.5)*2;
-                    //avPlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = avDict, Confidence = confidence });
-
+                    
                    if (latest_img == null)
                    {
                        latest_img = frame.CreateWriteableBitmap();
--- a/gui/OpenFaceOffline/MainWindow.xaml.cs
+++ b/gui/OpenFaceOffline/MainWindow.xaml.cs
@ -193,7 +193,7 @@ namespace OpenFaceOffline

            String root = AppDomain.CurrentDomain.BaseDirectory;

-            clnf_params = new FaceModelParameters(root);
+            clnf_params = new FaceModelParameters(root, false);
            clnf_model = new CLNF(clnf_params);
            face_analyser = new FaceAnalyserManaged(root, use_dynamic_models);

--- a/lib/local/CppInerop/LandmarkDetectorInterop.h
+++ b/lib/local/CppInerop/LandmarkDetectorInterop.h
@ -97,7 +97,7 @@ namespace CppInterop {
 		public:

 			// Initialise the parameters
-			FaceModelParameters(System::String^ root)
+			FaceModelParameters(System::String^ root, bool demo)
 			{
 				std::string root_std = msclr::interop::marshal_as<std::string>(root);
 				vector<std::string> args;
@ -105,6 +105,11 @@ namespace CppInterop {

 				params = new ::LandmarkDetector::FaceModelParameters(args);

+				if(demo)
+				{
+					params->model_location = "model/main_clnf_demos.txt";
+				}
+
 				params->track_gaze = true;
 			}

--- a/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h
+++ b/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h
@ -155,6 +155,10 @@ public:
 	// Useful when resetting or initialising the model closer to a specific location (when multiple faces are present)
 	cv::Point_<double> preference_det;

+	// Useful to control where face detections should not occur (for live demos etc.)
+	double detect_Z_max = -1; // Do not detect faces further than this in mm. (-1) refers to detecting all faces
+	cv::Rect_<double> detect_ROI = cv::Rect(0, 0, 1, 1); // the face detection bounds (0,0,1,1) means full image (0.25,0.25,0.5,0.5) would imply region of the face only
+
 	// A default constructor
 	CLNF();

--- a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h
+++ b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h
@ -143,16 +143,16 @@ namespace LandmarkDetector
 	//============================================================================

 	// Face detection using Haar cascade classifier
-	bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity);
-	bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier);
+	bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
+	bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
 	// The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen
-	bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1));
+	bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));

 	// Face detection using HOG-SVM classifier
-	bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences);
-	bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, std::vector<double>& confidences);
+	bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
+	bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, std::vector<double>& confidences, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
 	// The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen
-	bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1));
+	bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0,0.0,1.0,1.0));

 	//============================================================================
 	// Matrix reading functionality
--- a/lib/local/LandmarkDetector/model/main_clnf_demos.txt
+++ b/lib/local/LandmarkDetector/model/main_clnf_demos.txt
@ -0,0 +1,5 @@
+LandmarkDetector clnf_general.txt
+LandmarkDetector_part model_eye/main_clnf_synth_left.txt left_eye_28 36 8 37 10 38 12 39 14 40 16 41 18
+LandmarkDetector_part model_eye/main_clnf_synth_right.txt right_eye_28 42 8 43 10 44 12 45 14 46 16 47 18
+DetectionValidator detection_validation/validator_general_68.txt
+DetectorConstaints placeholder 700 0.05 0.05 0.9 0.9
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
@ -350,14 +350,27 @@ bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat_<uchar> &grayscale_i
 		}

 		bool face_detection_success;
+
+		// For pruning face detections
+		double min_size;
+		if (clnf_model.detect_Z_max == -1)
+		{
+			min_size = -1;
+		}
+		else
+		{
+			double fx_est = 500.0 * (grayscale_image.cols / 640.0);
+			min_size = fx_est * 150.0 / clnf_model.detect_Z_max;
+		}
+
 		if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
 		{
 			double confidence;
-			face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det);
+			face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det, min_size, clnf_model.detect_ROI);
 		}
 		else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
 		{
-			face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det);
+			face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det, min_size, clnf_model.detect_ROI);
 		}

 		// Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored)
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
@ -93,7 +93,7 @@ CLNF::CLNF(string fname)
 CLNF::CLNF(const CLNF& other): pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()),
 	landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location),
 	hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names),
-	hierarchical_params(other.hierarchical_params), eye_model(other.eye_model)
+	hierarchical_params(other.hierarchical_params), eye_model(other.eye_model), detect_Z_max(other.detect_Z_max), detect_ROI(other.detect_ROI)
 {
 	this->detection_success = other.detection_success;
 	this->tracking_initialised = other.tracking_initialised;
@ -173,6 +173,9 @@ CLNF & CLNF::operator= (const CLNF& other)
 		this->hierarchical_models = other.hierarchical_models;
 		this->hierarchical_model_names = other.hierarchical_model_names;
 		this->hierarchical_params = other.hierarchical_params;
+
+		this->detect_Z_max = other.detect_Z_max;
+		this->detect_ROI = detect_ROI;
 	}

 	face_detector_HOG = dlib::get_frontal_face_detector();
@ -213,6 +216,8 @@ CLNF::CLNF(const CLNF&& other)

 	this->eye_model = other.eye_model;

+	this->detect_Z_max = other.detect_Z_max;
+	this->detect_ROI = detect_ROI;
 }

 // Assignment operator for rvalues
@ -247,6 +252,9 @@ CLNF & CLNF::operator= (const CLNF&& other)
 	this->hierarchical_params = other.hierarchical_params;

 	this->eye_model = other.eye_model;
+	
+	this->detect_Z_max = other.detect_Z_max;
+	this->detect_ROI = detect_ROI;

 	return *this;
 }
@ -365,6 +373,10 @@ void CLNF::Read(string main_location)
 	
 	// The other module locations should be defined as relative paths from the main model
 	boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();	
+	
+	// Initializing defaults that could be overwriten
+	detect_Z_max = -1;
+	detect_ROI = cv::Rect_<double>(0, 0, 1, 1);

 	// The main file contains the references to other files
 	while (!locations.eof())
@ -527,6 +539,16 @@ void CLNF::Read(string main_location)
 			landmark_validator.Read(location);
 			cout << "Done" << endl;
 		}
+		else if (module.compare("DetectorConstaints") == 0)
+		{
+			cout << "Reading detectir constraints...";
+			lineStream >> detect_Z_max;
+			lineStream >> detect_ROI.x;
+			lineStream >> detect_ROI.y;
+			lineStream >> detect_ROI.width;
+			lineStream >> detect_ROI.height;
+			cout << "Done" << endl;
+		}
 	}
 
 	detected_landmarks.create(2 * pdm.NumberOfPoints(), 1);
@ -551,6 +573,7 @@ void CLNF::Read(string main_location)
 	preference_det.x = -1;
 	preference_det.y = -1;

+
 }

 // Resetting the model (for a new video, or complet reinitialisation
--- a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp
+++ b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp
@ -1295,7 +1295,7 @@ cv::Vec3d RotationMatrix2AxisAngle(const cv::Matx33d& rotation_matrix)
 //============================================================================
 // Face detection helpers
 //============================================================================
-bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity)
+bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, double min_width, cv::Rect_<double> roi)
 {
 	cv::CascadeClassifier classifier("./classifiers/haarcascade_frontalface_alt.xml");
 	if(classifier.empty())
@ -1305,47 +1305,60 @@ bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& i
 	}
 	else
 	{
-		return DetectFaces(o_regions, intensity, classifier);
+		return DetectFaces(o_regions, intensity, classifier, min_width, roi);
 	}

 }

-bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier)
+bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, double min_width, cv::Rect_<double> roi)
 {
 		
 	vector<cv::Rect> face_detections;
-	classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
+	if(min_width == -1)
+	{
+		classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
+	}
+	else
+	{
+		classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(min_width, min_width));
+	}

 	// Convert from int bounding box do a double one with corrections
-	o_regions.resize(face_detections.size());
-
 	for( size_t face = 0; face < o_regions.size(); ++face)
 	{
 		// OpenCV is overgenerous with face size and y location is off
 		// CLNF detector expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate

 		// The scalings were learned using the Face Detections on LFPW, Helen, AFW and iBUG datasets, using ground truth and detections from openCV
-
+		cv::Rect_<double> region;
 		// Correct for scale
-		o_regions[face].width = face_detections[face].width * 0.8924; 
-		o_regions[face].height = face_detections[face].height * 0.8676;
+		region.width = face_detections[face].width * 0.8924;
+		region.height = face_detections[face].height * 0.8676;

 		// Move the face slightly to the right (as the width was made smaller)
-		o_regions[face].x = face_detections[face].x + 0.0578 * face_detections[face].width;
+		region.x = face_detections[face].x + 0.0578 * face_detections[face].width;
 		// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
-		o_regions[face].y = face_detections[face].y + face_detections[face].height * 0.2166;
-		
+		region.y = face_detections[face].y + face_detections[face].height * 0.2166;
 		
+		if (min_width != -1)
+		{
+			if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y ||
+				region.x + region.width >((double)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height))
+				continue;
+		}
+
+
+		o_regions.push_back(region);
 	}
 	return o_regions.size() > 0;
 }

-bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference)
+bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference, double min_width, cv::Rect_<double> roi)
 {
 	// The tracker can return multiple faces
 	vector<cv::Rect_<double> > face_detections;
 				
-	bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier);
+	bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier, min_width, roi);
 					
 	if(detect_success)
 	{
@ -1399,15 +1412,15 @@ bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intens
 	return detect_success;
 }

-bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences)
+bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences, double min_width, cv::Rect_<double> roi)
 {
 	dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();

-	return DetectFacesHOG(o_regions, intensity, detector, confidences);
+	return DetectFacesHOG(o_regions, intensity, detector, confidences, min_width, roi);

 }

-bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& detector, std::vector<double>& o_confidences)
+bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& detector, std::vector<double>& o_confidences, double min_width, cv::Rect_<double> roi)
 {
 		
 	cv::Mat_<uchar> upsampled_intensity;
@ -1422,37 +1435,46 @@ bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>
 	detector(cv_grayscale, face_detections, -0.2);

 	// Convert from int bounding box do a double one with corrections
-	o_regions.resize(face_detections.size());
-	o_confidences.resize(face_detections.size());
+	//o_regions.resize(face_detections.size());
+	//o_confidences.resize(face_detections.size());

-	for( size_t face = 0; face < o_regions.size(); ++face)
+	for( size_t face = 0; face < face_detections.size(); ++face)
 	{
 		// CLNF expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate

-		// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
-
+		cv::Rect_<double> region;
 		// Move the face slightly to the right (as the width was made smaller)
-		o_regions[face].x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width())/scaling;
+		region.x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width()) / scaling;
 		// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
-		o_regions[face].y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height())/scaling;
+		region.y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height()) / scaling;

 		// Correct for scale
-		o_regions[face].width = (face_detections[face].rect.get_rect().width() * 0.9611)/scaling; 
-		o_regions[face].height = (face_detections[face].rect.get_rect().height() * 0.9388)/scaling;
+		region.width = (face_detections[face].rect.get_rect().width() * 0.9611) / scaling;
+		region.height = (face_detections[face].rect.get_rect().height() * 0.9388) / scaling;

-		o_confidences[face] = face_detections[face].detection_confidence;
+		// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
+		if (min_width != -1)
+		{
+			if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y ||
+				region.x + region.width > ((double)intensity.cols) * (roi.x+roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height))
+				continue;
+		}
+
+
+		o_regions.push_back(region);
+		o_confidences.push_back(face_detections[face].detection_confidence);
 		
 		
 	}
 	return o_regions.size() > 0;
 }

-bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference)
+bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference, double min_width, cv::Rect_<double> roi)
 {
 	// The tracker can return multiple faces
 	vector<cv::Rect_<double> > face_detections;
 	vector<double> confidences;
-	bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences);
+	bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences, min_width, roi);

 	// In case of multiple faces pick the biggest one
 	bool use_size = true;