From 8a30e55f4bcf224eba607f7934013312960b227a Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Thu, 6 Oct 2016 16:17:33 -0400 Subject: [PATCH] ROI, face distance and demo mode CLNF file. --- gui/OpenFaceDemo/MainWindow.xaml.cs | 21 ++--- gui/OpenFaceOffline/MainWindow.xaml.cs | 2 +- lib/local/CppInerop/LandmarkDetectorInterop.h | 7 +- .../include/LandmarkDetectorModel.h | 4 + .../include/LandmarkDetectorUtils.h | 12 +-- .../model/main_clnf_demos.txt | 5 ++ .../src/LandmarkDetectorFunc.cpp | 17 +++- .../src/LandmarkDetectorModel.cpp | 25 +++++- .../src/LandmarkDetectorUtils.cpp | 80 ++++++++++++------- 9 files changed, 119 insertions(+), 54 deletions(-) create mode 100644 lib/local/LandmarkDetector/model/main_clnf_demos.txt diff --git a/gui/OpenFaceDemo/MainWindow.xaml.cs b/gui/OpenFaceDemo/MainWindow.xaml.cs index 526565e..8c399ea 100644 --- a/gui/OpenFaceDemo/MainWindow.xaml.cs +++ b/gui/OpenFaceDemo/MainWindow.xaml.cs @@ -86,7 +86,7 @@ namespace OpenFaceDemo String root = AppDomain.CurrentDomain.BaseDirectory; - clnf_params = new FaceModelParameters(root); + clnf_params = new FaceModelParameters(root, true); clnf_model = new CLNF(clnf_params); face_analyser = new FaceAnalyserManaged(root, true); @@ -259,7 +259,10 @@ namespace OpenFaceDemo confidence = 1; List pose = new List(); + + clnf_model.GetCorrectedPoseCamera(pose, fx, fy, cx, cy); + List non_rigid_params = clnf_model.GetNonRigidParams(); double scale = clnf_model.GetRigidParams()[0]; @@ -277,8 +280,8 @@ namespace OpenFaceDemo double y_gaze = (Math.Atan2(gaze.Item1.Item2, -gaze.Item1.Item3) + Math.Atan2(gaze.Item2.Item2, -gaze.Item2.Item3)) / 2.0; // Scaling for clearer vis. - x_gaze *= 2; - y_gaze *= 2; + x_gaze *= 2.5; + y_gaze *= 2.5; if (detectionSucceeding) { @@ -339,17 +342,7 @@ namespace OpenFaceDemo old_gaze_x = gazeDict[0]; old_gaze_y = gazeDict[1]; - - //Dictionary valenceDict = new Dictionary(); - //valenceDict[0] = (face_analyser.GetValence() - 1.0) / 6.5; - //valenceDict[1] = face_analyser.GetArousal(); - //valencePlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = valenceDict, Confidence = confidence }); - - //Dictionary avDict = new Dictionary(); - //avDict[0] = (face_analyser.GetArousal() - 0.5) * 2.0; - //avDict[1] = ((face_analyser.GetValence() - 1.0) / 6.5 - 0.5)*2; - //avPlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = avDict, Confidence = confidence }); - + if (latest_img == null) { latest_img = frame.CreateWriteableBitmap(); diff --git a/gui/OpenFaceOffline/MainWindow.xaml.cs b/gui/OpenFaceOffline/MainWindow.xaml.cs index b13660a..3a94c4f 100644 --- a/gui/OpenFaceOffline/MainWindow.xaml.cs +++ b/gui/OpenFaceOffline/MainWindow.xaml.cs @@ -193,7 +193,7 @@ namespace OpenFaceOffline String root = AppDomain.CurrentDomain.BaseDirectory; - clnf_params = new FaceModelParameters(root); + clnf_params = new FaceModelParameters(root, false); clnf_model = new CLNF(clnf_params); face_analyser = new FaceAnalyserManaged(root, use_dynamic_models); diff --git a/lib/local/CppInerop/LandmarkDetectorInterop.h b/lib/local/CppInerop/LandmarkDetectorInterop.h index 7ada1dd..c3cb895 100644 --- a/lib/local/CppInerop/LandmarkDetectorInterop.h +++ b/lib/local/CppInerop/LandmarkDetectorInterop.h @@ -97,7 +97,7 @@ namespace CppInterop { public: // Initialise the parameters - FaceModelParameters(System::String^ root) + FaceModelParameters(System::String^ root, bool demo) { std::string root_std = msclr::interop::marshal_as(root); vector args; @@ -105,6 +105,11 @@ namespace CppInterop { params = new ::LandmarkDetector::FaceModelParameters(args); + if(demo) + { + params->model_location = "model/main_clnf_demos.txt"; + } + params->track_gaze = true; } diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h b/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h index b5fb8e4..456cb9d 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorModel.h @@ -155,6 +155,10 @@ public: // Useful when resetting or initialising the model closer to a specific location (when multiple faces are present) cv::Point_ preference_det; + // Useful to control where face detections should not occur (for live demos etc.) + double detect_Z_max = -1; // Do not detect faces further than this in mm. (-1) refers to detecting all faces + cv::Rect_ detect_ROI = cv::Rect(0, 0, 1, 1); // the face detection bounds (0,0,1,1) means full image (0.25,0.25,0.5,0.5) would imply region of the face only + // A default constructor CLNF(); diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h index cdd6ab3..450c1e1 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h @@ -143,16 +143,16 @@ namespace LandmarkDetector //============================================================================ // Face detection using Haar cascade classifier - bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity); - bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, cv::CascadeClassifier& classifier); + bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0, 0.0, 1.0, 1.0)); + bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, cv::CascadeClassifier& classifier, double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0, 0.0, 1.0, 1.0)); // The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen - bool DetectSingleFace(cv::Rect_& o_region, const cv::Mat_& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1)); + bool DetectSingleFace(cv::Rect_& o_region, const cv::Mat_& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0, 0.0, 1.0, 1.0)); // Face detection using HOG-SVM classifier - bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, std::vector& confidences); - bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, dlib::frontal_face_detector& classifier, std::vector& confidences); + bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, std::vector& confidences, double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0, 0.0, 1.0, 1.0)); + bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, dlib::frontal_face_detector& classifier, std::vector& confidences, double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0, 0.0, 1.0, 1.0)); // The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen - bool DetectSingleFaceHOG(cv::Rect_& o_region, const cv::Mat_& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1)); + bool DetectSingleFaceHOG(cv::Rect_& o_region, const cv::Mat_& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_ roi = cv::Rect_(0.0,0.0,1.0,1.0)); //============================================================================ // Matrix reading functionality diff --git a/lib/local/LandmarkDetector/model/main_clnf_demos.txt b/lib/local/LandmarkDetector/model/main_clnf_demos.txt new file mode 100644 index 0000000..22e365e --- /dev/null +++ b/lib/local/LandmarkDetector/model/main_clnf_demos.txt @@ -0,0 +1,5 @@ +LandmarkDetector clnf_general.txt +LandmarkDetector_part model_eye/main_clnf_synth_left.txt left_eye_28 36 8 37 10 38 12 39 14 40 16 41 18 +LandmarkDetector_part model_eye/main_clnf_synth_right.txt right_eye_28 42 8 43 10 44 12 45 14 46 16 47 18 +DetectionValidator detection_validation/validator_general_68.txt +DetectorConstaints placeholder 700 0.05 0.05 0.9 0.9 \ No newline at end of file diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp index 0325531..0bd820d 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp @@ -350,14 +350,27 @@ bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat_ &grayscale_i } bool face_detection_success; + + // For pruning face detections + double min_size; + if (clnf_model.detect_Z_max == -1) + { + min_size = -1; + } + else + { + double fx_est = 500.0 * (grayscale_image.cols / 640.0); + min_size = fx_est * 150.0 / clnf_model.detect_Z_max; + } + if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR) { double confidence; - face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det); + face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det, min_size, clnf_model.detect_ROI); } else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR) { - face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det); + face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det, min_size, clnf_model.detect_ROI); } // Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored) diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp index 88cc8a7..1e49812 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp @@ -93,7 +93,7 @@ CLNF::CLNF(string fname) CLNF::CLNF(const CLNF& other): pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()), landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location), hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names), - hierarchical_params(other.hierarchical_params), eye_model(other.eye_model) + hierarchical_params(other.hierarchical_params), eye_model(other.eye_model), detect_Z_max(other.detect_Z_max), detect_ROI(other.detect_ROI) { this->detection_success = other.detection_success; this->tracking_initialised = other.tracking_initialised; @@ -173,6 +173,9 @@ CLNF & CLNF::operator= (const CLNF& other) this->hierarchical_models = other.hierarchical_models; this->hierarchical_model_names = other.hierarchical_model_names; this->hierarchical_params = other.hierarchical_params; + + this->detect_Z_max = other.detect_Z_max; + this->detect_ROI = detect_ROI; } face_detector_HOG = dlib::get_frontal_face_detector(); @@ -213,6 +216,8 @@ CLNF::CLNF(const CLNF&& other) this->eye_model = other.eye_model; + this->detect_Z_max = other.detect_Z_max; + this->detect_ROI = detect_ROI; } // Assignment operator for rvalues @@ -247,6 +252,9 @@ CLNF & CLNF::operator= (const CLNF&& other) this->hierarchical_params = other.hierarchical_params; this->eye_model = other.eye_model; + + this->detect_Z_max = other.detect_Z_max; + this->detect_ROI = detect_ROI; return *this; } @@ -365,6 +373,10 @@ void CLNF::Read(string main_location) // The other module locations should be defined as relative paths from the main model boost::filesystem::path root = boost::filesystem::path(main_location).parent_path(); + + // Initializing defaults that could be overwriten + detect_Z_max = -1; + detect_ROI = cv::Rect_(0, 0, 1, 1); // The main file contains the references to other files while (!locations.eof()) @@ -527,6 +539,16 @@ void CLNF::Read(string main_location) landmark_validator.Read(location); cout << "Done" << endl; } + else if (module.compare("DetectorConstaints") == 0) + { + cout << "Reading detectir constraints..."; + lineStream >> detect_Z_max; + lineStream >> detect_ROI.x; + lineStream >> detect_ROI.y; + lineStream >> detect_ROI.width; + lineStream >> detect_ROI.height; + cout << "Done" << endl; + } } detected_landmarks.create(2 * pdm.NumberOfPoints(), 1); @@ -551,6 +573,7 @@ void CLNF::Read(string main_location) preference_det.x = -1; preference_det.y = -1; + } // Resetting the model (for a new video, or complet reinitialisation diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp index e074e7f..60a2d65 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp @@ -1295,7 +1295,7 @@ cv::Vec3d RotationMatrix2AxisAngle(const cv::Matx33d& rotation_matrix) //============================================================================ // Face detection helpers //============================================================================ -bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity) +bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, double min_width, cv::Rect_ roi) { cv::CascadeClassifier classifier("./classifiers/haarcascade_frontalface_alt.xml"); if(classifier.empty()) @@ -1305,47 +1305,60 @@ bool DetectFaces(vector >& o_regions, const cv::Mat_& i } else { - return DetectFaces(o_regions, intensity, classifier); + return DetectFaces(o_regions, intensity, classifier, min_width, roi); } } -bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, cv::CascadeClassifier& classifier) +bool DetectFaces(vector >& o_regions, const cv::Mat_& intensity, cv::CascadeClassifier& classifier, double min_width, cv::Rect_ roi) { vector face_detections; - classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50)); + if(min_width == -1) + { + classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50)); + } + else + { + classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(min_width, min_width)); + } // Convert from int bounding box do a double one with corrections - o_regions.resize(face_detections.size()); - for( size_t face = 0; face < o_regions.size(); ++face) { // OpenCV is overgenerous with face size and y location is off // CLNF detector expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate // The scalings were learned using the Face Detections on LFPW, Helen, AFW and iBUG datasets, using ground truth and detections from openCV - + cv::Rect_ region; // Correct for scale - o_regions[face].width = face_detections[face].width * 0.8924; - o_regions[face].height = face_detections[face].height * 0.8676; + region.width = face_detections[face].width * 0.8924; + region.height = face_detections[face].height * 0.8676; // Move the face slightly to the right (as the width was made smaller) - o_regions[face].x = face_detections[face].x + 0.0578 * face_detections[face].width; + region.x = face_detections[face].x + 0.0578 * face_detections[face].width; // Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested - o_regions[face].y = face_detections[face].y + face_detections[face].height * 0.2166; - + region.y = face_detections[face].y + face_detections[face].height * 0.2166; + if (min_width != -1) + { + if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y || + region.x + region.width >((double)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height)) + continue; + } + + + o_regions.push_back(region); } return o_regions.size() > 0; } -bool DetectSingleFace(cv::Rect_& o_region, const cv::Mat_& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference) +bool DetectSingleFace(cv::Rect_& o_region, const cv::Mat_& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference, double min_width, cv::Rect_ roi) { // The tracker can return multiple faces vector > face_detections; - bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier); + bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier, min_width, roi); if(detect_success) { @@ -1399,15 +1412,15 @@ bool DetectSingleFace(cv::Rect_& o_region, const cv::Mat_& intens return detect_success; } -bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, std::vector& confidences) +bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, std::vector& confidences, double min_width, cv::Rect_ roi) { dlib::frontal_face_detector detector = dlib::get_frontal_face_detector(); - return DetectFacesHOG(o_regions, intensity, detector, confidences); + return DetectFacesHOG(o_regions, intensity, detector, confidences, min_width, roi); } -bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, dlib::frontal_face_detector& detector, std::vector& o_confidences) +bool DetectFacesHOG(vector >& o_regions, const cv::Mat_& intensity, dlib::frontal_face_detector& detector, std::vector& o_confidences, double min_width, cv::Rect_ roi) { cv::Mat_ upsampled_intensity; @@ -1422,37 +1435,46 @@ bool DetectFacesHOG(vector >& o_regions, const cv::Mat_ detector(cv_grayscale, face_detections, -0.2); // Convert from int bounding box do a double one with corrections - o_regions.resize(face_detections.size()); - o_confidences.resize(face_detections.size()); + //o_regions.resize(face_detections.size()); + //o_confidences.resize(face_detections.size()); - for( size_t face = 0; face < o_regions.size(); ++face) + for( size_t face = 0; face < face_detections.size(); ++face) { // CLNF expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate - // The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector - + cv::Rect_ region; // Move the face slightly to the right (as the width was made smaller) - o_regions[face].x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width())/scaling; + region.x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width()) / scaling; // Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested - o_regions[face].y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height())/scaling; + region.y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height()) / scaling; // Correct for scale - o_regions[face].width = (face_detections[face].rect.get_rect().width() * 0.9611)/scaling; - o_regions[face].height = (face_detections[face].rect.get_rect().height() * 0.9388)/scaling; + region.width = (face_detections[face].rect.get_rect().width() * 0.9611) / scaling; + region.height = (face_detections[face].rect.get_rect().height() * 0.9388) / scaling; - o_confidences[face] = face_detections[face].detection_confidence; + // The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector + if (min_width != -1) + { + if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y || + region.x + region.width > ((double)intensity.cols) * (roi.x+roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height)) + continue; + } + + + o_regions.push_back(region); + o_confidences.push_back(face_detections[face].detection_confidence); } return o_regions.size() > 0; } -bool DetectSingleFaceHOG(cv::Rect_& o_region, const cv::Mat_& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference) +bool DetectSingleFaceHOG(cv::Rect_& o_region, const cv::Mat_& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference, double min_width, cv::Rect_ roi) { // The tracker can return multiple faces vector > face_detections; vector confidences; - bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences); + bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences, min_width, roi); // In case of multiple faces pick the biggest one bool use_size = true;