sustaining_gazes/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp

1234 lines
37 KiB
C++
Raw Normal View History

2016-04-28 21:40:36 +02:00
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
2016-04-28 21:40:36 +02:00
// all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
2016-04-28 21:40:36 +02:00
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace: an open source facial behavior analysis toolkit
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency
2016-04-28 21:40:36 +02:00
// in IEEE Winter Conference on Applications of Computer Vision, 2016
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
2016-04-28 21:40:36 +02:00
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
2016-04-28 21:40:36 +02:00
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
// Constrained Local Neural Fields for robust facial landmark detection in the wild.
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency.
2016-04-28 21:40:36 +02:00
// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <LandmarkDetectorModel.h>
// Boost includes
#include <filesystem.hpp>
#include <filesystem/fstream.hpp>
// TBB includes
#include <tbb/tbb.h>
// Local includes
#include <LandmarkDetectorUtils.h>
#include <RotationHelpers.h>
2016-04-28 21:40:36 +02:00
using namespace LandmarkDetector;
//=============================================================================
//=============================================================================
// Constructors
// A default constructor
CLNF::CLNF()
{
FaceModelParameters parameters;
this->Read(parameters.model_location);
}
// Constructor from a model file
CLNF::CLNF(string fname)
{
this->Read(fname);
}
// Copy constructor (makes a deep copy of CLNF)
CLNF::CLNF(const CLNF& other) : pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()),
landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location),
hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names),
hierarchical_params(other.hierarchical_params), eye_model(other.eye_model)
2016-04-28 21:40:36 +02:00
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
2017-07-30 03:11:16 +02:00
2016-04-28 21:40:36 +02:00
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if (!face_detector_location.empty())
2016-04-28 21:40:36 +02:00
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for (size_t i = 0; i < other.triangulations.size(); ++i)
2016-04-28 21:40:36 +02:00
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for (std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it != other.kde_resp_precalc.end(); it++)
2016-04-28 21:40:36 +02:00
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
this->face_detector_HOG = dlib::get_frontal_face_detector();
}
// Assignment operator for lvalues (makes a deep copy of CLNF)
CLNF & CLNF::operator= (const CLNF& other)
{
if (this != &other) // protect against invalid self-assignment
{
pdm = PDM(other.pdm);
params_local = other.params_local.clone();
params_global = other.params_global;
detected_landmarks = other.detected_landmarks.clone();
landmark_likelihoods = other.landmark_likelihoods.clone();
2016-04-28 21:40:36 +02:00
patch_experts = Patch_experts(other.patch_experts);
landmark_validator = DetectionValidator(other.landmark_validator);
face_detector_location = other.face_detector_location;
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
this->eye_model = other.eye_model;
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if (!face_detector_location.empty())
2016-04-28 21:40:36 +02:00
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for (size_t i = 0; i < other.triangulations.size(); ++i)
2016-04-28 21:40:36 +02:00
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for (std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it != other.kde_resp_precalc.end(); it++)
2016-04-28 21:40:36 +02:00
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
}
face_detector_HOG = dlib::get_frontal_face_detector();
return *this;
}
// Move constructor
CLNF::CLNF(const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
}
// Assignment operator for rvalues
CLNF & CLNF::operator= (const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
return *this;
}
void CLNF::Read_CLNF(string clnf_location)
{
// Location of modules
ifstream locations(clnf_location.c_str(), ios_base::in);
if (!locations.is_open())
2016-04-28 21:40:36 +02:00
{
cout << "Couldn't open the CLNF model file aborting" << endl;
cout.flush();
return;
}
string line;
2016-04-28 21:40:36 +02:00
vector<string> intensity_expert_locations;
vector<string> ccnf_expert_locations;
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(clnf_location).parent_path();
// The main file contains the references to other files
while (!locations.eof())
{
2016-04-28 21:40:36 +02:00
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
2016-04-28 21:40:36 +02:00
getline(lineStream, location);
if (location.size() > 0)
2016-04-28 21:40:36 +02:00
location.erase(location.begin()); // remove the first space
// remove carriage return at the end for compatibility with unix systems
if (location.size() > 0 && location.at(location.size() - 1) == '\r')
2016-04-28 21:40:36 +02:00
{
location = location.substr(0, location.size() - 1);
2016-04-28 21:40:36 +02:00
}
// append the lovstion to root location (boost syntax)
location = (root / location).string();
if (module.compare("PDM") == 0)
{
2016-04-28 21:40:36 +02:00
cout << "Reading the PDM module from: " << location << "....";
pdm.Read(location);
cout << "Done" << endl;
}
else if (module.compare("Triangulations") == 0)
{
2016-04-28 21:40:36 +02:00
cout << "Reading the Triangulations module from: " << location << "....";
ifstream triangulationFile(location.c_str(), ios_base::in);
LandmarkDetector::SkipComments(triangulationFile);
int numViews;
triangulationFile >> numViews;
// read in the triangulations
triangulations.resize(numViews);
for (int i = 0; i < numViews; ++i)
2016-04-28 21:40:36 +02:00
{
LandmarkDetector::SkipComments(triangulationFile);
LandmarkDetector::ReadMat(triangulationFile, triangulations[i]);
}
cout << "Done" << endl;
}
else if (module.compare("PatchesIntensity") == 0)
2016-04-28 21:40:36 +02:00
{
intensity_expert_locations.push_back(location);
}
else if (module.compare("PatchesCCNF") == 0)
2016-04-28 21:40:36 +02:00
{
ccnf_expert_locations.push_back(location);
}
}
2016-04-28 21:40:36 +02:00
// Initialise the patch experts
patch_experts.Read(intensity_expert_locations, ccnf_expert_locations);
2016-04-28 21:40:36 +02:00
// Read in a face detector
face_detector_HOG = dlib::get_frontal_face_detector();
}
void CLNF::Read(string main_location)
{
cout << "Reading the CLNF landmark detector/tracker from: " << main_location << endl;
2016-04-28 21:40:36 +02:00
ifstream locations(main_location.c_str(), ios_base::in);
if (!locations.is_open())
2016-04-28 21:40:36 +02:00
{
cout << "Couldn't open the model file, aborting" << endl;
return;
}
string line;
2016-04-28 21:40:36 +02:00
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();
2016-04-28 21:40:36 +02:00
// Assume no eye model, unless read-in
eye_model = false;
2016-04-28 21:40:36 +02:00
// The main file contains the references to other files
while (!locations.eof())
{
2016-04-28 21:40:36 +02:00
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
2016-04-28 21:40:36 +02:00
lineStream >> location;
2016-04-28 21:40:36 +02:00
// remove carriage return at the end for compatibility with unix systems
if (location.size() > 0 && location.at(location.size() - 1) == '\r')
2016-04-28 21:40:36 +02:00
{
location = location.substr(0, location.size() - 1);
2016-04-28 21:40:36 +02:00
}
2016-04-28 21:40:36 +02:00
// append to root
location = (root / location).string();
if (module.compare("LandmarkDetector") == 0)
{
2016-04-28 21:40:36 +02:00
cout << "Reading the landmark detector module from: " << location << endl;
// The CLNF module includes the PDM and the patch experts
Read_CLNF(location);
}
else if (module.compare("LandmarkDetector_part") == 0)
2016-04-28 21:40:36 +02:00
{
string part_name;
lineStream >> part_name;
cout << "Reading part based module...." << part_name << endl;
vector<pair<int, int>> mappings;
while (!lineStream.eof())
2016-04-28 21:40:36 +02:00
{
int ind_in_main;
lineStream >> ind_in_main;
2016-04-28 21:40:36 +02:00
int ind_in_part;
lineStream >> ind_in_part;
mappings.push_back(pair<int, int>(ind_in_main, ind_in_part));
}
2016-04-28 21:40:36 +02:00
this->hierarchical_mapping.push_back(mappings);
CLNF part_model(location);
this->hierarchical_models.push_back(part_model);
this->hierarchical_model_names.push_back(part_name);
FaceModelParameters params;
params.validate_detections = false;
params.refine_hierarchical = false;
params.refine_parameters = false;
if (part_name.compare("left_eye") == 0 || part_name.compare("right_eye") == 0)
2016-04-28 21:40:36 +02:00
{
2016-04-28 21:40:36 +02:00
vector<int> windows_large;
windows_large.push_back(5);
windows_large.push_back(3);
vector<int> windows_small;
windows_small.push_back(5);
windows_small.push_back(3);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.1;
params.sigma = 2;
}
else if (part_name.compare("left_eye_28") == 0 || part_name.compare("right_eye_28") == 0)
2016-04-28 21:40:36 +02:00
{
vector<int> windows_large;
windows_large.push_back(3);
windows_large.push_back(5);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(3);
windows_small.push_back(5);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.5;
params.sigma = 1.0;
eye_model = true;
}
else if (part_name.compare("mouth") == 0)
2016-04-28 21:40:36 +02:00
{
vector<int> windows_large;
windows_large.push_back(7);
windows_large.push_back(7);
vector<int> windows_small;
windows_small.push_back(7);
windows_small.push_back(7);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 1.0;
params.sigma = 2.0;
}
else if (part_name.compare("brow") == 0)
2016-04-28 21:40:36 +02:00
{
vector<int> windows_large;
windows_large.push_back(11);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(11);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 10.0;
params.sigma = 3.5;
}
else if (part_name.compare("inner") == 0)
2016-04-28 21:40:36 +02:00
{
vector<int> windows_large;
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 2.5;
params.sigma = 1.75;
params.weight_factor = 2.5;
}
this->hierarchical_params.push_back(params);
cout << "Done" << endl;
}
else if (module.compare("DetectionValidator") == 0)
{
2016-04-28 21:40:36 +02:00
cout << "Reading the landmark validation module....";
landmark_validator.Read(location);
cout << "Done" << endl;
}
}
2016-04-28 21:40:36 +02:00
detected_landmarks.create(2 * pdm.NumberOfPoints(), 1);
detected_landmarks.setTo(0);
SetDetectionSuccess(false);
SetInitialized(false);
2016-04-28 21:40:36 +02:00
model_likelihood = -10; // very low
detection_certainty = 0; // very uncertain
2016-04-28 21:40:36 +02:00
// Initialising default values for the rest of the variables
2016-04-28 21:40:36 +02:00
// local parameters (shape)
2016-04-28 21:40:36 +02:00
params_local.create(pdm.NumberOfModes(), 1);
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
2017-11-25 19:46:53 +01:00
params_global = cv::Vec6d(0, 0, 0, 0, 0, 0);
2016-04-28 21:40:36 +02:00
failures_in_a_row = -1;
}
void CLNF::SetDetectionSuccess(bool success)
{
this->detection_success = success;
for (size_t i = 0; i < hierarchical_models.size(); ++i)
{
hierarchical_models[i].SetDetectionSuccess(success);
}
}
void CLNF::SetInitialized(bool initialized)
{
this->tracking_initialised = initialized;
for (size_t i = 0; i < hierarchical_models.size(); ++i)
{
hierarchical_models[i].SetInitialized(initialized);
}
2016-04-28 21:40:36 +02:00
}
// Resetting the model (for a new video, or complet reinitialisation
void CLNF::Reset()
{
detected_landmarks.setTo(0);
SetDetectionSuccess(false);
SetInitialized(false);
2016-04-28 21:40:36 +02:00
model_likelihood = -10; // very low
detection_certainty = 0; // very uncertain
2016-04-28 21:40:36 +02:00
// local parameters (shape)
2016-04-28 21:40:36 +02:00
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
params_global = cv::Vec6d(1, 0, 0, 0, 0, 0);
failures_in_a_row = -1;
face_template = cv::Mat_<uchar>();
}
// Resetting the model, choosing the face nearest (x,y)
void CLNF::Reset(double x, double y)
{
// First reset the model overall
this->Reset();
// Now in the following frame when face detection takes place this is the point at which it will be preffered
this->preference_det.x = x;
this->preference_det.y = y;
}
// The main internal landmark detection call (should not be used externally?)
bool CLNF::DetectLandmarks(const cv::Mat_<uchar> &image, FaceModelParameters& params)
2016-04-28 21:40:36 +02:00
{
// Fits from the current estimate of local and global parameters in the model
bool fit_success = Fit(image, params.window_sizes_current, params);
2016-04-28 21:40:36 +02:00
// Store the landmarks converged on in detected_landmarks
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
if (params.refine_hierarchical && hierarchical_models.size() > 0)
2016-04-28 21:40:36 +02:00
{
bool parts_used = false;
2016-04-28 21:40:36 +02:00
// Do the hierarchical models in parallel
tbb::parallel_for(0, (int)hierarchical_models.size(), [&](int part_model) {
2016-04-28 21:40:36 +02:00
{
int n_part_points = hierarchical_models[part_model].pdm.NumberOfPoints();
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
cv::Mat_<double> part_model_locs(n_part_points * 2, 1, 0.0);
// Extract the corresponding landmarks
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
part_model_locs.at<double>(mappings[mapping_ind].second) = detected_landmarks.at<double>(mappings[mapping_ind].first);
part_model_locs.at<double>(mappings[mapping_ind].second + n_part_points) = detected_landmarks.at<double>(mappings[mapping_ind].first + this->pdm.NumberOfPoints());
}
// Fit the part based model PDM
hierarchical_models[part_model].pdm.CalcParams(hierarchical_models[part_model].params_global, hierarchical_models[part_model].params_local, part_model_locs);
// Only do this if we don't need to upsample
if (params_global[0] > 0.9 * hierarchical_models[part_model].patch_experts.patch_scaling[0])
{
parts_used = true;
this->hierarchical_params[part_model].window_sizes_current = this->hierarchical_params[part_model].window_sizes_init;
// Do the actual landmark detection
hierarchical_models[part_model].DetectLandmarks(image, hierarchical_params[part_model]);
2016-04-28 21:40:36 +02:00
}
else
{
hierarchical_models[part_model].pdm.CalcShape2D(hierarchical_models[part_model].detected_landmarks, hierarchical_models[part_model].params_local, hierarchical_models[part_model].params_global);
}
}
});
// Recompute main model based on the fit part models
if (parts_used)
2016-04-28 21:40:36 +02:00
{
2016-08-13 23:56:17 +02:00
for (size_t part_model = 0; part_model < hierarchical_models.size(); ++part_model)
{
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
// Reincorporate the models into main tracker
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
detected_landmarks.at<double>(mappings[mapping_ind].first) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second);
detected_landmarks.at<double>(mappings[mapping_ind].first + pdm.NumberOfPoints()) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second + hierarchical_models[part_model].pdm.NumberOfPoints());
}
}
pdm.CalcParams(params_global, params_local, detected_landmarks);
2016-04-28 21:40:36 +02:00
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
}
2016-04-28 21:40:36 +02:00
}
// Check detection correctness
if (params.validate_detections && fit_success)
2016-04-28 21:40:36 +02:00
{
cv::Vec3d orientation(params_global[1], params_global[2], params_global[3]);
detection_certainty = landmark_validator.Check(orientation, image, detected_landmarks);
detection_success = detection_certainty > params.validation_boundary;
2016-04-28 21:40:36 +02:00
}
else
{
detection_success = fit_success;
if (fit_success)
2016-04-28 21:40:36 +02:00
{
detection_certainty = 1;
2016-04-28 21:40:36 +02:00
}
else
{
detection_certainty = 0;
2016-04-28 21:40:36 +02:00
}
}
return detection_success;
}
//=============================================================================
bool CLNF::Fit(const cv::Mat_<uchar>& im, const std::vector<int>& window_sizes, const FaceModelParameters& parameters)
2016-04-28 21:40:36 +02:00
{
// Making sure it is a single channel image
assert(im.channels() == 1);
2016-04-28 21:40:36 +02:00
// Placeholder for the landmarks
cv::Mat_<double> current_shape(2 * pdm.NumberOfPoints(), 1, 0.0);
int n = pdm.NumberOfPoints();
2016-04-28 21:40:36 +02:00
int num_scales = patch_experts.patch_scaling.size();
// Storing the patch expert response maps
vector<cv::Mat_<float> > patch_expert_responses(n);
// Converting from image space to patch expert space (normalised for rotation and scale)
cv::Matx22f sim_ref_to_img;
cv::Matx22d sim_img_to_ref;
FaceModelParameters tmp_parameters = parameters;
// Optimise the model across a number of areas of interest (usually in descending window size and ascending scale size)
for (int scale = 0; scale < num_scales; scale++)
2016-04-28 21:40:36 +02:00
{
int window_size = window_sizes[scale];
if (window_size == 0 || 0.9 * patch_experts.patch_scaling[scale] > params_global[0])
2016-04-28 21:40:36 +02:00
continue;
// The patch expert response computation
patch_experts.Response(patch_expert_responses, sim_ref_to_img, sim_img_to_ref, im, pdm, params_global, params_local, window_size, scale);
if (parameters.refine_parameters == true)
2016-04-28 21:40:36 +02:00
{
// Adapt the parameters based on scale (wan't to reduce regularisation as scale increases, but increa sigma and tikhonov)
tmp_parameters.reg_factor = parameters.reg_factor - 15 * log(patch_experts.patch_scaling[scale] / 0.25) / log(2);
if (tmp_parameters.reg_factor <= 0)
2016-04-28 21:40:36 +02:00
tmp_parameters.reg_factor = 0.001;
tmp_parameters.sigma = parameters.sigma + 0.25 * log(patch_experts.patch_scaling[scale] / 0.25) / log(2);
tmp_parameters.weight_factor = parameters.weight_factor + 2 * parameters.weight_factor * log(patch_experts.patch_scaling[scale] / 0.25) / log(2);
2016-04-28 21:40:36 +02:00
}
// Get the current landmark locations
pdm.CalcShape2D(current_shape, params_local, params_global);
// Get the view used by patch experts
int view_id = patch_experts.GetViewIdx(params_global, scale);
// the actual optimisation step
this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, true, scale, this->landmark_likelihoods, tmp_parameters);
// non-rigid optimisation
this->model_likelihood = this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, false, scale, this->landmark_likelihoods, tmp_parameters);
2016-04-28 21:40:36 +02:00
// Can't track very small images reliably (less than ~30px across)
if (params_global[0] < 0.25)
2016-04-28 21:40:36 +02:00
{
cout << "Face too small for landmark detection" << endl;
return false;
}
}
return true;
}
void CLNF::NonVectorisedMeanShift_precalc_kde(cv::Mat_<float>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<float> &dxs, const cv::Mat_<float> &dys, int resp_size, float a, int scale, int view_id, map<int, cv::Mat_<float> >& kde_resp_precalc)
{
2016-04-28 21:40:36 +02:00
int n = dxs.rows;
2016-04-28 21:40:36 +02:00
cv::Mat_<float> kde_resp;
float step_size = 0.1;
// if this has not been precomputer, precompute it, otherwise use it
if (kde_resp_precalc.find(resp_size) == kde_resp_precalc.end())
{
kde_resp = cv::Mat_<float>((int)((resp_size / step_size)*(resp_size / step_size)), resp_size * resp_size);
2016-04-28 21:40:36 +02:00
cv::MatIterator_<float> kde_it = kde_resp.begin();
for (int x = 0; x < resp_size / step_size; x++)
2016-04-28 21:40:36 +02:00
{
float dx = x * step_size;
for (int y = 0; y < resp_size / step_size; y++)
2016-04-28 21:40:36 +02:00
{
float dy = y * step_size;
int ii, jj;
float v, vx, vy;
for (ii = 0; ii < resp_size; ii++)
2016-04-28 21:40:36 +02:00
{
vx = (dy - ii)*(dy - ii);
for (jj = 0; jj < resp_size; jj++)
2016-04-28 21:40:36 +02:00
{
vy = (dx - jj)*(dx - jj);
2016-04-28 21:40:36 +02:00
// the KDE evaluation of that point
v = exp(a*(vx + vy));
2016-04-28 21:40:36 +02:00
*kde_it++ = v;
}
}
}
}
kde_resp_precalc[resp_size] = kde_resp.clone();
}
else
{
// use the precomputed version
kde_resp = kde_resp_precalc.find(resp_size)->second;
}
// for every point (patch) calculating mean-shift
for (int i = 0; i < n; i++)
2016-04-28 21:40:36 +02:00
{
if (patch_experts.visibilities[scale][view_id].at<int>(i, 0) == 0)
2016-04-28 21:40:36 +02:00
{
out_mean_shifts.at<float>(i, 0) = 0;
out_mean_shifts.at<float>(i + n, 0) = 0;
2016-04-28 21:40:36 +02:00
continue;
}
// indices of dx, dy
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
// Ensure that we are within bounds (important for precalculation)
if (dx < 0)
2016-04-28 21:40:36 +02:00
dx = 0;
if (dy < 0)
2016-04-28 21:40:36 +02:00
dy = 0;
if (dx > resp_size - step_size)
2016-04-28 21:40:36 +02:00
dx = resp_size - step_size;
if (dy > resp_size - step_size)
2016-04-28 21:40:36 +02:00
dy = resp_size - step_size;
2016-04-28 21:40:36 +02:00
// Pick the row from precalculated kde that approximates the current dx, dy best
int closest_col = (int)(dy / step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int closest_row = (int)(dx / step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int idx = closest_row * ((int)(resp_size / step_size + 0.5)) + closest_col; // Plus 0.5 is there, as C++ rounds down with int cast
2016-04-28 21:40:36 +02:00
cv::MatIterator_<float> kde_it = kde_resp.begin() + kde_resp.cols*idx;
float mx = 0.0;
float my = 0.0;
float sum = 0.0;
2016-04-28 21:40:36 +02:00
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for (int ii = 0; ii < resp_size; ii++)
2016-04-28 21:40:36 +02:00
{
for (int jj = 0; jj < resp_size; jj++)
2016-04-28 21:40:36 +02:00
{
// the KDE evaluation of that point multiplied by the probability at the current, xi, yi
float v = (*p++) * (*kde_it++);
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
2016-04-28 21:40:36 +02:00
}
}
float msx = (mx / sum - dx);
float msy = (my / sum - dy);
out_mean_shifts.at<float>(i, 0) = msx;
out_mean_shifts.at<float>(i + n, 0) = msy;
2016-04-28 21:40:36 +02:00
}
}
void CLNF::GetWeightMatrix(cv::Mat_<float>& WeightMatrix, int scale, int view_id, const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
2016-04-28 21:40:36 +02:00
// Is the weight matrix needed at all
if (parameters.weight_factor > 0)
2016-04-28 21:40:36 +02:00
{
WeightMatrix = cv::Mat_<float>::zeros(n * 2, n * 2);
2016-04-28 21:40:36 +02:00
for (int p = 0; p < n; p++)
2016-04-28 21:40:36 +02:00
{
if (!patch_experts.ccnf_expert_intensity.empty())
2016-04-28 21:40:36 +02:00
{
// for the x dimension
WeightMatrix.at<float>(p, p) = WeightMatrix.at<float>(p, p) + patch_experts.ccnf_expert_intensity[scale][view_id][p].patch_confidence;
2016-04-28 21:40:36 +02:00
// for they y dimension
WeightMatrix.at<float>(p + n, p + n) = WeightMatrix.at<float>(p, p);
2016-04-28 21:40:36 +02:00
}
else
{
// Across the modalities add the confidences
for (size_t pc = 0; pc < patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.size(); pc++)
2016-04-28 21:40:36 +02:00
{
// for the x dimension
WeightMatrix.at<float>(p, p) = WeightMatrix.at<float>(p, p) + patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.at(pc).confidence;
}
2016-04-28 21:40:36 +02:00
// for the y dimension
WeightMatrix.at<float>(p + n, p + n) = WeightMatrix.at<float>(p, p);
2016-04-28 21:40:36 +02:00
}
}
WeightMatrix = parameters.weight_factor * WeightMatrix;
}
else
{
WeightMatrix = cv::Mat_<float>::eye(n * 2, n * 2);
2016-04-28 21:40:36 +02:00
}
}
//=============================================================================
double CLNF::NU_RLMS(cv::Vec6d& final_global, cv::Mat_<double>& final_local, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Vec6d& initial_global, const cv::Mat_<double>& initial_local,
const cv::Mat_<double>& base_shape, const cv::Matx22d& sim_img_to_ref, const cv::Matx22f& sim_ref_to_img, int resp_size, int view_id, bool rigid, int scale, cv::Mat_<double>& landmark_lhoods,
const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
2016-04-28 21:40:36 +02:00
// Mean, eigenvalues, eigenvectors
cv::Mat_<double> M = this->pdm.mean_shape;
cv::Mat_<double> E = this->pdm.eigen_values;
//Mat_<double> V = this->pdm.princ_comp;
int m = pdm.NumberOfModes();
2016-04-28 21:40:36 +02:00
cv::Vec6d current_global(initial_global);
cv::Mat_<float> current_local;
initial_local.convertTo(current_local, CV_32F);
cv::Mat_<double> current_shape;
cv::Mat_<double> previous_shape;
// Pre-calculate the regularisation term
cv::Mat_<float> regTerm;
if (rigid)
2016-04-28 21:40:36 +02:00
{
regTerm = cv::Mat_<float>::zeros(6, 6);
2016-04-28 21:40:36 +02:00
}
else
{
cv::Mat_<double> regularisations = cv::Mat_<double>::zeros(1, 6 + m);
// Setting the regularisation to the inverse of eigenvalues
cv::Mat(parameters.reg_factor / E).copyTo(regularisations(cv::Rect(6, 0, m, 1)));
cv::Mat_<double> regTerm_d = cv::Mat::diag(regularisations.t());
regTerm_d.convertTo(regTerm, CV_32F);
}
2016-04-28 21:40:36 +02:00
cv::Mat_<float> WeightMatrix;
GetWeightMatrix(WeightMatrix, scale, view_id, parameters);
cv::Mat_<float> dxs, dys;
2016-04-28 21:40:36 +02:00
// The preallocated memory for the mean shifts
cv::Mat_<float> mean_shifts(2 * pdm.NumberOfPoints(), 1, 0.0);
// Number of iterations
for (int iter = 0; iter < parameters.num_optimisation_iteration; iter++)
2016-04-28 21:40:36 +02:00
{
// get the current estimates of x
pdm.CalcShape2D(current_shape, current_local, current_global);
if (iter > 0)
2016-04-28 21:40:36 +02:00
{
// if the shape hasn't changed terminate
if (norm(current_shape, previous_shape) < 0.01)
{
2016-04-28 21:40:36 +02:00
break;
}
}
current_shape.copyTo(previous_shape);
2016-04-28 21:40:36 +02:00
// Jacobian, and transposed weighted jacobian
cv::Mat_<float> J, J_w_t;
// calculate the appropriate Jacobians in 2D, even though the actual behaviour is in 3D, using small angle approximation and oriented shape
if (rigid)
2016-04-28 21:40:36 +02:00
{
pdm.ComputeRigidJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
else
{
pdm.ComputeJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
2016-04-28 21:40:36 +02:00
// useful for mean shift calculation
float a = -0.5 / (parameters.sigma * parameters.sigma);
2016-04-28 21:40:36 +02:00
cv::Mat_<double> current_shape_2D = current_shape.reshape(1, 2).t();
cv::Mat_<double> base_shape_2D = base_shape.reshape(1, 2).t();
cv::Mat_<float> offsets;
cv::Mat((current_shape_2D - base_shape_2D) * cv::Mat(sim_img_to_ref).t()).convertTo(offsets, CV_32F);
dxs = offsets.col(0) + (resp_size - 1) / 2;
dys = offsets.col(1) + (resp_size - 1) / 2;
2016-04-28 21:40:36 +02:00
NonVectorisedMeanShift_precalc_kde(mean_shifts, patch_expert_responses, dxs, dys, resp_size, a, scale, view_id, kde_resp_precalc);
// Now transform the mean shifts to the the image reference frame, as opposed to one of ref shape (object space)
cv::Mat_<float> mean_shifts_2D = (mean_shifts.reshape(1, 2)).t();
2016-04-28 21:40:36 +02:00
mean_shifts_2D = mean_shifts_2D * cv::Mat(sim_ref_to_img).t();
mean_shifts = cv::Mat(mean_shifts_2D.t()).reshape(1, n * 2);
2016-04-28 21:40:36 +02:00
// remove non-visible observations
for (int i = 0; i < n; ++i)
2016-04-28 21:40:36 +02:00
{
// if patch unavailable for current index
if (patch_experts.visibilities[scale][view_id].at<int>(i, 0) == 0)
{
2016-04-28 21:40:36 +02:00
cv::Mat Jx = J.row(i);
Jx = cvScalar(0);
cv::Mat Jy = J.row(i + n);
2016-04-28 21:40:36 +02:00
Jy = cvScalar(0);
mean_shifts.at<float>(i, 0) = 0.0f;
mean_shifts.at<float>(i + n, 0) = 0.0f;
2016-04-28 21:40:36 +02:00
}
}
// projection of the meanshifts onto the jacobians (using the weighted Jacobian, see Baltrusaitis 2013)
cv::Mat_<float> J_w_t_m = J_w_t * mean_shifts;
// Add the regularisation term
if (!rigid)
2016-04-28 21:40:36 +02:00
{
J_w_t_m(cv::Rect(0, 6, 1, m)) = J_w_t_m(cv::Rect(0, 6, 1, m)) - regTerm(cv::Rect(6, 6, m, m)) * current_local;
2016-04-28 21:40:36 +02:00
}
// Calculating the Hessian approximation
cv::Mat_<float> Hessian = J_w_t * J;
// Add the Tikhonov regularisation
Hessian = Hessian + regTerm;
// Solve for the parameter update (from Baltrusaitis 2013 based on eq (36) Saragih 2011)
cv::Mat_<float> param_update;
cv::solve(Hessian, J_w_t_m, param_update, CV_CHOLESKY);
2016-04-28 21:40:36 +02:00
// update the reference
pdm.UpdateModelParameters(param_update, current_local, current_global);
2016-04-28 21:40:36 +02:00
// clamp to the local parameters for valid expressions
pdm.Clamp(current_local, current_global, parameters);
}
// compute the log likelihood
double loglhood = 0;
2016-04-28 21:40:36 +02:00
landmark_lhoods = cv::Mat_<double>(n, 1, -1e8);
for (int i = 0; i < n; i++)
2016-04-28 21:40:36 +02:00
{
if (patch_experts.visibilities[scale][view_id].at<int>(i, 0) == 0)
2016-04-28 21:40:36 +02:00
{
continue;
}
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
int ii, jj;
float v, vx, vy, sum = 0.0;
2016-04-28 21:40:36 +02:00
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for (ii = 0; ii < resp_size; ii++)
2016-04-28 21:40:36 +02:00
{
vx = (dy - ii)*(dy - ii);
for (jj = 0; jj < resp_size; jj++)
2016-04-28 21:40:36 +02:00
{
vy = (dx - jj)*(dx - jj);
2016-04-28 21:40:36 +02:00
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
v *= exp(-0.5*(vx + vy) / (parameters.sigma * parameters.sigma));
2016-04-28 21:40:36 +02:00
sum += v;
}
}
landmark_lhoods.at<double>(i, 0) = (double)sum;
2016-04-28 21:40:36 +02:00
// the offset is there for numerical stability
loglhood += log(sum + 1e-8);
}
loglhood = loglhood / sum(patch_experts.visibilities[scale][view_id])[0];
2016-04-28 21:40:36 +02:00
final_global = current_global;
final_local = current_local;
return loglhood;
}
// Getting a 3D shape model from the current detected landmarks (in camera space)
cv::Mat_<double> CLNF::GetShape(double fx, double fy, double cx, double cy) const
{
int n = this->detected_landmarks.rows / 2;
2016-04-28 21:40:36 +02:00
cv::Mat_<double> outShape(n, 3, 0.0);
2016-04-28 21:40:36 +02:00
// If the tracking started (otherwise no point reporting 3D shape)
if (this->IsInitialized())
{
2016-04-28 21:40:36 +02:00
cv::Mat_<double> shape3d(n * 3, 1);
2016-04-28 21:40:36 +02:00
this->pdm.CalcShape3D(shape3d, this->params_local);
2016-04-28 21:40:36 +02:00
// Need to rotate the shape to get the actual 3D representation
2016-04-28 21:40:36 +02:00
// get the rotation matrix from the euler angles
cv::Matx33d R = Utilities::Euler2RotationMatrix(cv::Vec3d(params_global[1], params_global[2], params_global[3]));
2016-04-28 21:40:36 +02:00
shape3d = shape3d.reshape(1, 3);
2016-04-28 21:40:36 +02:00
shape3d = shape3d.t() * cv::Mat(R).t();
2016-04-28 21:40:36 +02:00
// from the weak perspective model can determine the average depth of the object
double Zavg = fx / params_global[0];
2016-04-28 21:40:36 +02:00
// this is described in the paper in section 3.4 (equation 10) (of the CLM-Z paper)
for (int i = 0; i < n; i++)
{
double Z = Zavg + shape3d.at<double>(i, 2);
2016-04-28 21:40:36 +02:00
double X = Z * ((this->detected_landmarks.at<double>(i) - cx) / fx);
double Y = Z * ((this->detected_landmarks.at<double>(i + n) - cy) / fy);
2016-04-28 21:40:36 +02:00
outShape.at<double>(i, 0) = (double)X;
outShape.at<double>(i, 1) = (double)Y;
outShape.at<double>(i, 2) = (double)Z;
2016-04-28 21:40:36 +02:00
}
2016-04-28 21:40:36 +02:00
}
// The format is 3 rows - n cols
return outShape.t();
2016-04-28 21:40:36 +02:00
}
2018-01-31 09:23:48 +01:00
cv::Mat_<int> CLNF::GetVisibilities() const
{
// Get the view of the largest scale
int scale = patch_experts.visibilities.size() - 1;
int view_id = patch_experts.GetViewIdx(params_global, scale);
cv::Mat_<int> visibilities_to_ret = this->patch_experts.visibilities[scale][view_id].clone();
return visibilities_to_ret;
}
2016-04-28 21:40:36 +02:00
// A utility bounding box function
cv::Rect_<double> CLNF::GetBoundingBox() const
{
cv::Mat_<double> xs = this->detected_landmarks(cv::Rect(0, 0, 1, this->detected_landmarks.rows / 2));
cv::Mat_<double> ys = this->detected_landmarks(cv::Rect(0, this->detected_landmarks.rows / 2, 1, this->detected_landmarks.rows / 2));
2016-04-28 21:40:36 +02:00
double min_x, max_x;
double min_y, max_y;
cv::minMaxLoc(xs, &min_x, &max_x);
cv::minMaxLoc(ys, &min_y, &max_y);
// See if the detections intersect
cv::Rect_<double> model_rect(min_x, min_y, max_x - min_x, max_y - min_y);
return model_rect;
}
// Legacy function not used at the moment
void CLNF::NonVectorisedMeanShift(cv::Mat_<double>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<double> &dxs, const cv::Mat_<double> &dys, int resp_size, double a, int scale, int view_id)
{
2016-04-28 21:40:36 +02:00
int n = dxs.rows;
for (int i = 0; i < n; i++)
2016-04-28 21:40:36 +02:00
{
if (patch_experts.visibilities[scale][view_id].at<int>(i, 0) == 0 || sum(patch_expert_responses[i])[0] == 0)
2016-04-28 21:40:36 +02:00
{
out_mean_shifts.at<double>(i, 0) = 0;
out_mean_shifts.at<double>(i + n, 0) = 0;
2016-04-28 21:40:36 +02:00
continue;
}
// indices of dx, dy
double dx = dxs.at<double>(i);
double dy = dys.at<double>(i);
int ii, jj;
double v, vx, vy, mx = 0.0, my = 0.0, sum = 0.0;
2016-04-28 21:40:36 +02:00
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for (ii = 0; ii < resp_size; ii++)
2016-04-28 21:40:36 +02:00
{
vx = (dy - ii)*(dy - ii);
for (jj = 0; jj < resp_size; jj++)
2016-04-28 21:40:36 +02:00
{
vy = (dx - jj)*(dx - jj);
2016-04-28 21:40:36 +02:00
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
double kd = exp(a*(vx + vy));
2016-04-28 21:40:36 +02:00
v *= kd;
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
2016-04-28 21:40:36 +02:00
}
}
// setting the actual mean shift update
double msx = (mx / sum - dx);
double msy = (my / sum - dy);
2016-04-28 21:40:36 +02:00
out_mean_shifts.at<double>(i, 0) = msx;
out_mean_shifts.at<double>(i + n, 0) = msy;
2016-04-28 21:40:36 +02:00
}
}