ROI, face distance and demo mode CLNF file.

This commit is contained in:
Tadas Baltrusaitis 2016-10-06 16:17:33 -04:00
parent 99cc06631f
commit 8a30e55f4b
9 changed files with 119 additions and 54 deletions

View file

@ -86,7 +86,7 @@ namespace OpenFaceDemo
String root = AppDomain.CurrentDomain.BaseDirectory;
clnf_params = new FaceModelParameters(root);
clnf_params = new FaceModelParameters(root, true);
clnf_model = new CLNF(clnf_params);
face_analyser = new FaceAnalyserManaged(root, true);
@ -259,7 +259,10 @@ namespace OpenFaceDemo
confidence = 1;
List<double> pose = new List<double>();
clnf_model.GetCorrectedPoseCamera(pose, fx, fy, cx, cy);
List<double> non_rigid_params = clnf_model.GetNonRigidParams();
double scale = clnf_model.GetRigidParams()[0];
@ -277,8 +280,8 @@ namespace OpenFaceDemo
double y_gaze = (Math.Atan2(gaze.Item1.Item2, -gaze.Item1.Item3) + Math.Atan2(gaze.Item2.Item2, -gaze.Item2.Item3)) / 2.0;
// Scaling for clearer vis.
x_gaze *= 2;
y_gaze *= 2;
x_gaze *= 2.5;
y_gaze *= 2.5;
if (detectionSucceeding)
{
@ -339,17 +342,7 @@ namespace OpenFaceDemo
old_gaze_x = gazeDict[0];
old_gaze_y = gazeDict[1];
//Dictionary<int, double> valenceDict = new Dictionary<int, double>();
//valenceDict[0] = (face_analyser.GetValence() - 1.0) / 6.5;
//valenceDict[1] = face_analyser.GetArousal();
//valencePlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = valenceDict, Confidence = confidence });
//Dictionary<int, double> avDict = new Dictionary<int, double>();
//avDict[0] = (face_analyser.GetArousal() - 0.5) * 2.0;
//avDict[1] = ((face_analyser.GetValence() - 1.0) / 6.5 - 0.5)*2;
//avPlot.AddDataPoint(new DataPoint() { Time = CurrentTime, values = avDict, Confidence = confidence });
if (latest_img == null)
{
latest_img = frame.CreateWriteableBitmap();

View file

@ -193,7 +193,7 @@ namespace OpenFaceOffline
String root = AppDomain.CurrentDomain.BaseDirectory;
clnf_params = new FaceModelParameters(root);
clnf_params = new FaceModelParameters(root, false);
clnf_model = new CLNF(clnf_params);
face_analyser = new FaceAnalyserManaged(root, use_dynamic_models);

View file

@ -97,7 +97,7 @@ namespace CppInterop {
public:
// Initialise the parameters
FaceModelParameters(System::String^ root)
FaceModelParameters(System::String^ root, bool demo)
{
std::string root_std = msclr::interop::marshal_as<std::string>(root);
vector<std::string> args;
@ -105,6 +105,11 @@ namespace CppInterop {
params = new ::LandmarkDetector::FaceModelParameters(args);
if(demo)
{
params->model_location = "model/main_clnf_demos.txt";
}
params->track_gaze = true;
}

View file

@ -155,6 +155,10 @@ public:
// Useful when resetting or initialising the model closer to a specific location (when multiple faces are present)
cv::Point_<double> preference_det;
// Useful to control where face detections should not occur (for live demos etc.)
double detect_Z_max = -1; // Do not detect faces further than this in mm. (-1) refers to detecting all faces
cv::Rect_<double> detect_ROI = cv::Rect(0, 0, 1, 1); // the face detection bounds (0,0,1,1) means full image (0.25,0.25,0.5,0.5) would imply region of the face only
// A default constructor
CLNF();

View file

@ -143,16 +143,16 @@ namespace LandmarkDetector
//============================================================================
// Face detection using Haar cascade classifier
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity);
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier);
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
// The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen
bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1));
bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
// Face detection using HOG-SVM classifier
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences);
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, std::vector<double>& confidences);
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, std::vector<double>& confidences, double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0, 0.0, 1.0, 1.0));
// The preference point allows for disambiguation if multiple faces are present (pick the closest one), if it is not set the biggest face is chosen
bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1));
bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& classifier, double& confidence, const cv::Point preference = cv::Point(-1,-1), double min_width = -1, cv::Rect_<double> roi = cv::Rect_<double>(0.0,0.0,1.0,1.0));
//============================================================================
// Matrix reading functionality

View file

@ -0,0 +1,5 @@
LandmarkDetector clnf_general.txt
LandmarkDetector_part model_eye/main_clnf_synth_left.txt left_eye_28 36 8 37 10 38 12 39 14 40 16 41 18
LandmarkDetector_part model_eye/main_clnf_synth_right.txt right_eye_28 42 8 43 10 44 12 45 14 46 16 47 18
DetectionValidator detection_validation/validator_general_68.txt
DetectorConstaints placeholder 700 0.05 0.05 0.9 0.9

View file

@ -350,14 +350,27 @@ bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat_<uchar> &grayscale_i
}
bool face_detection_success;
// For pruning face detections
double min_size;
if (clnf_model.detect_Z_max == -1)
{
min_size = -1;
}
else
{
double fx_est = 500.0 * (grayscale_image.cols / 640.0);
min_size = fx_est * 150.0 / clnf_model.detect_Z_max;
}
if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
{
double confidence;
face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det);
face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det, min_size, clnf_model.detect_ROI);
}
else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
{
face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det);
face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det, min_size, clnf_model.detect_ROI);
}
// Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored)

View file

@ -93,7 +93,7 @@ CLNF::CLNF(string fname)
CLNF::CLNF(const CLNF& other): pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()),
landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location),
hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names),
hierarchical_params(other.hierarchical_params), eye_model(other.eye_model)
hierarchical_params(other.hierarchical_params), eye_model(other.eye_model), detect_Z_max(other.detect_Z_max), detect_ROI(other.detect_ROI)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
@ -173,6 +173,9 @@ CLNF & CLNF::operator= (const CLNF& other)
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->detect_Z_max = other.detect_Z_max;
this->detect_ROI = detect_ROI;
}
face_detector_HOG = dlib::get_frontal_face_detector();
@ -213,6 +216,8 @@ CLNF::CLNF(const CLNF&& other)
this->eye_model = other.eye_model;
this->detect_Z_max = other.detect_Z_max;
this->detect_ROI = detect_ROI;
}
// Assignment operator for rvalues
@ -247,6 +252,9 @@ CLNF & CLNF::operator= (const CLNF&& other)
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
this->detect_Z_max = other.detect_Z_max;
this->detect_ROI = detect_ROI;
return *this;
}
@ -365,6 +373,10 @@ void CLNF::Read(string main_location)
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();
// Initializing defaults that could be overwriten
detect_Z_max = -1;
detect_ROI = cv::Rect_<double>(0, 0, 1, 1);
// The main file contains the references to other files
while (!locations.eof())
@ -527,6 +539,16 @@ void CLNF::Read(string main_location)
landmark_validator.Read(location);
cout << "Done" << endl;
}
else if (module.compare("DetectorConstaints") == 0)
{
cout << "Reading detectir constraints...";
lineStream >> detect_Z_max;
lineStream >> detect_ROI.x;
lineStream >> detect_ROI.y;
lineStream >> detect_ROI.width;
lineStream >> detect_ROI.height;
cout << "Done" << endl;
}
}
detected_landmarks.create(2 * pdm.NumberOfPoints(), 1);
@ -551,6 +573,7 @@ void CLNF::Read(string main_location)
preference_det.x = -1;
preference_det.y = -1;
}
// Resetting the model (for a new video, or complet reinitialisation

View file

@ -1295,7 +1295,7 @@ cv::Vec3d RotationMatrix2AxisAngle(const cv::Matx33d& rotation_matrix)
//============================================================================
// Face detection helpers
//============================================================================
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity)
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, double min_width, cv::Rect_<double> roi)
{
cv::CascadeClassifier classifier("./classifiers/haarcascade_frontalface_alt.xml");
if(classifier.empty())
@ -1305,47 +1305,60 @@ bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& i
}
else
{
return DetectFaces(o_regions, intensity, classifier);
return DetectFaces(o_regions, intensity, classifier, min_width, roi);
}
}
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier)
bool DetectFaces(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, double min_width, cv::Rect_<double> roi)
{
vector<cv::Rect> face_detections;
classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
if(min_width == -1)
{
classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
}
else
{
classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(min_width, min_width));
}
// Convert from int bounding box do a double one with corrections
o_regions.resize(face_detections.size());
for( size_t face = 0; face < o_regions.size(); ++face)
{
// OpenCV is overgenerous with face size and y location is off
// CLNF detector expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
// The scalings were learned using the Face Detections on LFPW, Helen, AFW and iBUG datasets, using ground truth and detections from openCV
cv::Rect_<double> region;
// Correct for scale
o_regions[face].width = face_detections[face].width * 0.8924;
o_regions[face].height = face_detections[face].height * 0.8676;
region.width = face_detections[face].width * 0.8924;
region.height = face_detections[face].height * 0.8676;
// Move the face slightly to the right (as the width was made smaller)
o_regions[face].x = face_detections[face].x + 0.0578 * face_detections[face].width;
region.x = face_detections[face].x + 0.0578 * face_detections[face].width;
// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
o_regions[face].y = face_detections[face].y + face_detections[face].height * 0.2166;
region.y = face_detections[face].y + face_detections[face].height * 0.2166;
if (min_width != -1)
{
if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y ||
region.x + region.width >((double)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height))
continue;
}
o_regions.push_back(region);
}
return o_regions.size() > 0;
}
bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference)
bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference, double min_width, cv::Rect_<double> roi)
{
// The tracker can return multiple faces
vector<cv::Rect_<double> > face_detections;
bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier);
bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier, min_width, roi);
if(detect_success)
{
@ -1399,15 +1412,15 @@ bool DetectSingleFace(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intens
return detect_success;
}
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences)
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, std::vector<double>& confidences, double min_width, cv::Rect_<double> roi)
{
dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
return DetectFacesHOG(o_regions, intensity, detector, confidences);
return DetectFacesHOG(o_regions, intensity, detector, confidences, min_width, roi);
}
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& detector, std::vector<double>& o_confidences)
bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>& intensity, dlib::frontal_face_detector& detector, std::vector<double>& o_confidences, double min_width, cv::Rect_<double> roi)
{
cv::Mat_<uchar> upsampled_intensity;
@ -1422,37 +1435,46 @@ bool DetectFacesHOG(vector<cv::Rect_<double> >& o_regions, const cv::Mat_<uchar>
detector(cv_grayscale, face_detections, -0.2);
// Convert from int bounding box do a double one with corrections
o_regions.resize(face_detections.size());
o_confidences.resize(face_detections.size());
//o_regions.resize(face_detections.size());
//o_confidences.resize(face_detections.size());
for( size_t face = 0; face < o_regions.size(); ++face)
for( size_t face = 0; face < face_detections.size(); ++face)
{
// CLNF expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
cv::Rect_<double> region;
// Move the face slightly to the right (as the width was made smaller)
o_regions[face].x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width())/scaling;
region.x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389 * face_detections[face].rect.get_rect().width()) / scaling;
// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
o_regions[face].y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height())/scaling;
region.y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278 * face_detections[face].rect.get_rect().height()) / scaling;
// Correct for scale
o_regions[face].width = (face_detections[face].rect.get_rect().width() * 0.9611)/scaling;
o_regions[face].height = (face_detections[face].rect.get_rect().height() * 0.9388)/scaling;
region.width = (face_detections[face].rect.get_rect().width() * 0.9611) / scaling;
region.height = (face_detections[face].rect.get_rect().height() * 0.9388) / scaling;
o_confidences[face] = face_detections[face].detection_confidence;
// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
if (min_width != -1)
{
if (region.width < min_width || region.x < ((double)intensity.cols) * roi.x || region.y < ((double)intensity.cols) * roi.y ||
region.x + region.width > ((double)intensity.cols) * (roi.x+roi.width) || region.y + region.height >((double)intensity.rows) * (roi.y + roi.height))
continue;
}
o_regions.push_back(region);
o_confidences.push_back(face_detections[face].detection_confidence);
}
return o_regions.size() > 0;
}
bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference)
bool DetectSingleFaceHOG(cv::Rect_<double>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, double& confidence, cv::Point preference, double min_width, cv::Rect_<double> roi)
{
// The tracker can return multiple faces
vector<cv::Rect_<double> > face_detections;
vector<double> confidences;
bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences);
bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences, min_width, roi);
// In case of multiple faces pick the biggest one
bool use_size = true;