sustaining_gazes/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp

1345 lines
42 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016, Carnegie Mellon University and University of Cambridge,
// all rights reserved.
//
// THIS SOFTWARE IS PROVIDED “AS IS” FOR ACADEMIC USE ONLY AND ANY EXPRESS
// OR IMPLIED WARRANTIES WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS
// BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY.
// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Notwithstanding the license granted herein, Licensee acknowledges that certain components
// of the Software may be covered by so-called “open source” software licenses (“Open Source
// Components”), which means any software licenses approved as open source licenses by the
// Open Source Initiative or any substantially similar licenses, including without limitation any
// license that, as a condition of distribution of the software licensed under such license,
// requires that the distributor make the software available in source code format. Licensor shall
// provide a list of Open Source Components for a particular version of the Software upon
// Licensees request. Licensee will comply with the applicable terms of such licenses and to
// the extent required by the licenses covering Open Source Components, the terms of such
// licenses will apply in lieu of the terms of this Agreement. To the extent the terms of the
// licenses applicable to Open Source Components prohibit any of the restrictions in this
// License Agreement with respect to such Open Source Component, such restrictions will not
// apply to such Open Source Component. To the extent the terms of the licenses applicable to
// Open Source Components require Licensor to make an offer to provide source code or
// related information in connection with the Software, such offer is hereby made. Any request
// for source code or related information should be directed to cl-face-tracker-distribution@lists.cam.ac.uk
// Licensee acknowledges receipt of notices for the Open Source Components for the initial
// delivery of the Software.
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace: an open source facial behavior analysis toolkit
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency
// in IEEE Winter Conference on Applications of Computer Vision, 2016
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
// Constrained Local Neural Fields for robust facial landmark detection in the wild.
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency.
// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <LandmarkDetectorModel.h>
// Boost includes
#include <filesystem.hpp>
#include <filesystem/fstream.hpp>
// TBB includes
#include <tbb/tbb.h>
// Local includes
#include <LandmarkDetectorUtils.h>
using namespace LandmarkDetector;
//=============================================================================
//=============================================================================
// Constructors
// A default constructor
CLNF::CLNF()
{
FaceModelParameters parameters;
this->Read(parameters.model_location);
}
// Constructor from a model file
CLNF::CLNF(string fname)
{
this->Read(fname);
}
// Copy constructor (makes a deep copy of CLNF)
CLNF::CLNF(const CLNF& other): pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()),
landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location),
hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names),
hierarchical_params(other.hierarchical_params), eye_model(other.eye_model)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if(!face_detector_location.empty())
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for(size_t i = 0; i < other.triangulations.size(); ++i)
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for(std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it!= other.kde_resp_precalc.end(); it++)
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
this->face_detector_HOG = dlib::get_frontal_face_detector();
}
// Assignment operator for lvalues (makes a deep copy of CLNF)
CLNF & CLNF::operator= (const CLNF& other)
{
if (this != &other) // protect against invalid self-assignment
{
pdm = PDM(other.pdm);
params_local = other.params_local.clone();
params_global = other.params_global;
detected_landmarks = other.detected_landmarks.clone();
landmark_likelihoods =other.landmark_likelihoods.clone();
patch_experts = Patch_experts(other.patch_experts);
landmark_validator = DetectionValidator(other.landmark_validator);
face_detector_location = other.face_detector_location;
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
this->eye_model = other.eye_model;
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if(!face_detector_location.empty())
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for(size_t i = 0; i < other.triangulations.size(); ++i)
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for(std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it!= other.kde_resp_precalc.end(); it++)
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
}
face_detector_HOG = dlib::get_frontal_face_detector();
return *this;
}
// Move constructor
CLNF::CLNF(const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
}
// Assignment operator for rvalues
CLNF & CLNF::operator= (const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
return *this;
}
void CLNF::Read_CLNF(string clnf_location)
{
// Location of modules
ifstream locations(clnf_location.c_str(), ios_base::in);
if(!locations.is_open())
{
cout << "Couldn't open the CLNF model file aborting" << endl;
cout.flush();
return;
}
string line;
vector<string> intensity_expert_locations;
vector<string> depth_expert_locations;
vector<string> ccnf_expert_locations;
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(clnf_location).parent_path();
// The main file contains the references to other files
while (!locations.eof())
{
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
getline(lineStream, location);
if(location.size() > 0)
location.erase(location.begin()); // remove the first space
// remove carriage return at the end for compatibility with unix systems
if(location.size() > 0 && location.at(location.size()-1) == '\r')
{
location = location.substr(0, location.size()-1);
}
// append the lovstion to root location (boost syntax)
location = (root / location).string();
if (module.compare("PDM") == 0)
{
cout << "Reading the PDM module from: " << location << "....";
pdm.Read(location);
cout << "Done" << endl;
}
else if (module.compare("Triangulations") == 0)
{
cout << "Reading the Triangulations module from: " << location << "....";
ifstream triangulationFile(location.c_str(), ios_base::in);
LandmarkDetector::SkipComments(triangulationFile);
int numViews;
triangulationFile >> numViews;
// read in the triangulations
triangulations.resize(numViews);
for(int i = 0; i < numViews; ++i)
{
LandmarkDetector::SkipComments(triangulationFile);
LandmarkDetector::ReadMat(triangulationFile, triangulations[i]);
}
cout << "Done" << endl;
}
else if(module.compare("PatchesIntensity") == 0)
{
intensity_expert_locations.push_back(location);
}
else if(module.compare("PatchesDepth") == 0)
{
depth_expert_locations.push_back(location);
}
else if(module.compare("PatchesCCNF") == 0)
{
ccnf_expert_locations.push_back(location);
}
}
// Initialise the patch experts
patch_experts.Read(intensity_expert_locations, depth_expert_locations, ccnf_expert_locations);
// Read in a face detector
face_detector_HOG = dlib::get_frontal_face_detector();
}
void CLNF::Read(string main_location)
{
cout << "Reading the CLNF landmark detector/tracker from: " << main_location << endl;
ifstream locations(main_location.c_str(), ios_base::in);
if(!locations.is_open())
{
cout << "Couldn't open the model file, aborting" << endl;
return;
}
string line;
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();
// Assume no eye model, unless read-in
eye_model = false;
// The main file contains the references to other files
while (!locations.eof())
{
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
lineStream >> location;
// remove carriage return at the end for compatibility with unix systems
if(location.size() > 0 && location.at(location.size()-1) == '\r')
{
location = location.substr(0, location.size()-1);
}
// append to root
location = (root / location).string();
if (module.compare("LandmarkDetector") == 0)
{
cout << "Reading the landmark detector module from: " << location << endl;
// The CLNF module includes the PDM and the patch experts
Read_CLNF(location);
}
else if(module.compare("LandmarkDetector_part") == 0)
{
string part_name;
lineStream >> part_name;
cout << "Reading part based module...." << part_name << endl;
vector<pair<int, int>> mappings;
while(!lineStream.eof())
{
int ind_in_main;
lineStream >> ind_in_main;
int ind_in_part;
lineStream >> ind_in_part;
mappings.push_back(pair<int, int>(ind_in_main, ind_in_part));
}
this->hierarchical_mapping.push_back(mappings);
CLNF part_model(location);
this->hierarchical_models.push_back(part_model);
this->hierarchical_model_names.push_back(part_name);
FaceModelParameters params;
params.validate_detections = false;
params.refine_hierarchical = false;
params.refine_parameters = false;
if(part_name.compare("left_eye") == 0 || part_name.compare("right_eye") == 0)
{
vector<int> windows_large;
windows_large.push_back(5);
windows_large.push_back(3);
vector<int> windows_small;
windows_small.push_back(5);
windows_small.push_back(3);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.1;
params.sigma = 2;
}
else if(part_name.compare("left_eye_28") == 0 || part_name.compare("right_eye_28") == 0)
{
vector<int> windows_large;
windows_large.push_back(3);
windows_large.push_back(5);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(3);
windows_small.push_back(5);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.5;
params.sigma = 1.0;
eye_model = true;
}
else if(part_name.compare("mouth") == 0)
{
vector<int> windows_large;
windows_large.push_back(7);
windows_large.push_back(7);
vector<int> windows_small;
windows_small.push_back(7);
windows_small.push_back(7);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 1.0;
params.sigma = 2.0;
}
else if(part_name.compare("brow") == 0)
{
vector<int> windows_large;
windows_large.push_back(11);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(11);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 10.0;
params.sigma = 3.5;
}
else if(part_name.compare("inner") == 0)
{
vector<int> windows_large;
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 2.5;
params.sigma = 1.75;
params.weight_factor = 2.5;
}
this->hierarchical_params.push_back(params);
cout << "Done" << endl;
}
else if (module.compare("DetectionValidator") == 0)
{
cout << "Reading the landmark validation module....";
landmark_validator.Read(location);
cout << "Done" << endl;
}
}
detected_landmarks.create(2 * pdm.NumberOfPoints(), 1);
detected_landmarks.setTo(0);
detection_success = false;
tracking_initialised = false;
model_likelihood = -10; // very low
detection_certainty = 1; // very uncertain
// Initialising default values for the rest of the variables
// local parameters (shape)
params_local.create(pdm.NumberOfModes(), 1);
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
params_global = cv::Vec6d(1, 0, 0, 0, 0, 0);
failures_in_a_row = -1;
}
// Resetting the model (for a new video, or complet reinitialisation
void CLNF::Reset()
{
detected_landmarks.setTo(0);
detection_success = false;
tracking_initialised = false;
model_likelihood = -10; // very low
detection_certainty = 1; // very uncertain
// local parameters (shape)
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
params_global = cv::Vec6d(1, 0, 0, 0, 0, 0);
failures_in_a_row = -1;
face_template = cv::Mat_<uchar>();
}
// Resetting the model, choosing the face nearest (x,y)
void CLNF::Reset(double x, double y)
{
// First reset the model overall
this->Reset();
// Now in the following frame when face detection takes place this is the point at which it will be preffered
this->preference_det.x = x;
this->preference_det.y = y;
}
// The main internal landmark detection call (should not be used externally?)
bool CLNF::DetectLandmarks(const cv::Mat_<uchar> &image, const cv::Mat_<float> &depth, FaceModelParameters& params)
{
// Fits from the current estimate of local and global parameters in the model
bool fit_success = Fit(image, depth, params.window_sizes_current, params);
// Store the landmarks converged on in detected_landmarks
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
if(params.refine_hierarchical && hierarchical_models.size() > 0)
{
bool parts_used = false;
// Do the hierarchical models in parallel
tbb::parallel_for(0, (int)hierarchical_models.size(), [&](int part_model){
{
// Only do the synthetic eye models if we're doing gaze
if (!((hierarchical_model_names[part_model].compare("right_eye_28") == 0 ||
hierarchical_model_names[part_model].compare("left_eye_28") == 0)
&& !params.track_gaze))
{
int n_part_points = hierarchical_models[part_model].pdm.NumberOfPoints();
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
cv::Mat_<double> part_model_locs(n_part_points * 2, 1, 0.0);
// Extract the corresponding landmarks
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
part_model_locs.at<double>(mappings[mapping_ind].second) = detected_landmarks.at<double>(mappings[mapping_ind].first);
part_model_locs.at<double>(mappings[mapping_ind].second + n_part_points) = detected_landmarks.at<double>(mappings[mapping_ind].first + this->pdm.NumberOfPoints());
}
// Fit the part based model PDM
hierarchical_models[part_model].pdm.CalcParams(hierarchical_models[part_model].params_global, hierarchical_models[part_model].params_local, part_model_locs);
// Only do this if we don't need to upsample
if (params_global[0] > 0.9 * hierarchical_models[part_model].patch_experts.patch_scaling[0])
{
parts_used = true;
this->hierarchical_params[part_model].window_sizes_current = this->hierarchical_params[part_model].window_sizes_init;
// Do the actual landmark detection
hierarchical_models[part_model].DetectLandmarks(image, depth, hierarchical_params[part_model]);
}
else
{
hierarchical_models[part_model].pdm.CalcShape2D(hierarchical_models[part_model].detected_landmarks, hierarchical_models[part_model].params_local, hierarchical_models[part_model].params_global);
}
}
}
});
// Recompute main model based on the fit part models
if(parts_used)
{
for (size_t part_model = 0; part_model < hierarchical_models.size(); ++part_model)
{
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
if (!((hierarchical_model_names[part_model].compare("right_eye_28") == 0 ||
hierarchical_model_names[part_model].compare("left_eye_28") == 0)
&& !params.track_gaze))
{
// Reincorporate the models into main tracker
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
detected_landmarks.at<double>(mappings[mapping_ind].first) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second);
detected_landmarks.at<double>(mappings[mapping_ind].first + pdm.NumberOfPoints()) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second + hierarchical_models[part_model].pdm.NumberOfPoints());
}
}
}
pdm.CalcParams(params_global, params_local, detected_landmarks);
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
}
}
// Check detection correctness
if(params.validate_detections && fit_success)
{
cv::Vec3d orientation(params_global[1], params_global[2], params_global[3]);
detection_certainty = landmark_validator.Check(orientation, image, detected_landmarks);
detection_success = detection_certainty < params.validation_boundary;
}
else
{
detection_success = fit_success;
if(fit_success)
{
detection_certainty = -1;
}
else
{
detection_certainty = 1;
}
}
return detection_success;
}
//=============================================================================
bool CLNF::Fit(const cv::Mat_<uchar>& im, const cv::Mat_<float>& depthImg, const std::vector<int>& window_sizes, const FaceModelParameters& parameters)
{
// Making sure it is a single channel image
assert(im.channels() == 1);
// Placeholder for the landmarks
cv::Mat_<double> current_shape(2 * pdm.NumberOfPoints() , 1, 0.0);
int n = pdm.NumberOfPoints();
cv::Mat_<float> depth_img_no_background;
// Background elimination from the depth image
if(!depthImg.empty())
{
bool success = RemoveBackground(depth_img_no_background, depthImg);
// The attempted background removal can fail leading to tracking failure
if(!success)
{
return false;
}
}
int num_scales = patch_experts.patch_scaling.size();
// Storing the patch expert response maps
vector<cv::Mat_<float> > patch_expert_responses(n);
// Converting from image space to patch expert space (normalised for rotation and scale)
cv::Matx22f sim_ref_to_img;
cv::Matx22d sim_img_to_ref;
FaceModelParameters tmp_parameters = parameters;
// Optimise the model across a number of areas of interest (usually in descending window size and ascending scale size)
for(int scale = 0; scale < num_scales; scale++)
{
int window_size = window_sizes[scale];
if(window_size == 0 || 0.9 * patch_experts.patch_scaling[scale] > params_global[0])
continue;
// The patch expert response computation
if(scale != window_sizes.size() - 1)
{
patch_experts.Response(patch_expert_responses, sim_ref_to_img, sim_img_to_ref, im, depth_img_no_background, pdm, params_global, params_local, window_size, scale);
}
else
{
// Do not use depth for the final iteration as it is not as accurate
patch_experts.Response(patch_expert_responses, sim_ref_to_img, sim_img_to_ref, im, cv::Mat(), pdm, params_global, params_local, window_size, scale);
}
if(parameters.refine_parameters == true)
{
// Adapt the parameters based on scale (wan't to reduce regularisation as scale increases, but increa sigma and tikhonov)
tmp_parameters.reg_factor = parameters.reg_factor - 15 * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
if(tmp_parameters.reg_factor <= 0)
tmp_parameters.reg_factor = 0.001;
tmp_parameters.sigma = parameters.sigma + 0.25 * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
tmp_parameters.weight_factor = parameters.weight_factor + 2 * parameters.weight_factor * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
}
// Get the current landmark locations
pdm.CalcShape2D(current_shape, params_local, params_global);
// Get the view used by patch experts
int view_id = patch_experts.GetViewIdx(params_global, scale);
// the actual optimisation step
this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, true, scale, this->landmark_likelihoods, tmp_parameters);
// non-rigid optimisation
this->model_likelihood = this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, false, scale, this->landmark_likelihoods, tmp_parameters);
// Can't track very small images reliably (less than ~30px across)
if(params_global[0] < 0.25)
{
cout << "Face too small for landmark detection" << endl;
return false;
}
}
return true;
}
void CLNF::NonVectorisedMeanShift_precalc_kde(cv::Mat_<float>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<float> &dxs, const cv::Mat_<float> &dys, int resp_size, float a, int scale, int view_id, map<int, cv::Mat_<float> >& kde_resp_precalc)
{
int n = dxs.rows;
cv::Mat_<float> kde_resp;
float step_size = 0.1;
// if this has not been precomputer, precompute it, otherwise use it
if(kde_resp_precalc.find(resp_size) == kde_resp_precalc.end())
{
kde_resp = cv::Mat_<float>((int)((resp_size / step_size)*(resp_size/step_size)), resp_size * resp_size);
cv::MatIterator_<float> kde_it = kde_resp.begin();
for(int x = 0; x < resp_size/step_size; x++)
{
float dx = x * step_size;
for(int y = 0; y < resp_size/step_size; y++)
{
float dy = y * step_size;
int ii,jj;
float v,vx,vy;
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the KDE evaluation of that point
v = exp(a*(vx+vy));
*kde_it++ = v;
}
}
}
}
kde_resp_precalc[resp_size] = kde_resp.clone();
}
else
{
// use the precomputed version
kde_resp = kde_resp_precalc.find(resp_size)->second;
}
// for every point (patch) calculating mean-shift
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0)
{
out_mean_shifts.at<float>(i,0) = 0;
out_mean_shifts.at<float>(i+n,0) = 0;
continue;
}
// indices of dx, dy
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
// Ensure that we are within bounds (important for precalculation)
if(dx < 0)
dx = 0;
if(dy < 0)
dy = 0;
if(dx > resp_size - step_size)
dx = resp_size - step_size;
if(dy > resp_size - step_size)
dy = resp_size - step_size;
// Pick the row from precalculated kde that approximates the current dx, dy best
int closest_col = (int)(dy /step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int closest_row = (int)(dx /step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int idx = closest_row * ((int)(resp_size/step_size + 0.5)) + closest_col; // Plus 0.5 is there, as C++ rounds down with int cast
cv::MatIterator_<float> kde_it = kde_resp.begin() + kde_resp.cols*idx;
float mx=0.0;
float my=0.0;
float sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(int ii = 0; ii < resp_size; ii++)
{
for(int jj = 0; jj < resp_size; jj++)
{
// the KDE evaluation of that point multiplied by the probability at the current, xi, yi
float v = (*p++) * (*kde_it++);
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
}
}
float msx = (mx/sum - dx);
float msy = (my/sum - dy);
out_mean_shifts.at<float>(i,0) = msx;
out_mean_shifts.at<float>(i+n,0) = msy;
}
}
void CLNF::GetWeightMatrix(cv::Mat_<float>& WeightMatrix, int scale, int view_id, const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
// Is the weight matrix needed at all
if(parameters.weight_factor > 0)
{
WeightMatrix = cv::Mat_<float>::zeros(n*2, n*2);
for (int p=0; p < n; p++)
{
if(!patch_experts.ccnf_expert_intensity.empty())
{
// for the x dimension
WeightMatrix.at<float>(p,p) = WeightMatrix.at<float>(p,p) + patch_experts.ccnf_expert_intensity[scale][view_id][p].patch_confidence;
// for they y dimension
WeightMatrix.at<float>(p+n,p+n) = WeightMatrix.at<float>(p,p);
}
else
{
// Across the modalities add the confidences
for(size_t pc=0; pc < patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.size(); pc++)
{
// for the x dimension
WeightMatrix.at<float>(p,p) = WeightMatrix.at<float>(p,p) + patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.at(pc).confidence;
}
// for the y dimension
WeightMatrix.at<float>(p+n,p+n) = WeightMatrix.at<float>(p,p);
}
}
WeightMatrix = parameters.weight_factor * WeightMatrix;
}
else
{
WeightMatrix = cv::Mat_<float>::eye(n*2, n*2);
}
}
//=============================================================================
double CLNF::NU_RLMS(cv::Vec6d& final_global, cv::Mat_<double>& final_local, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Vec6d& initial_global, const cv::Mat_<double>& initial_local,
const cv::Mat_<double>& base_shape, const cv::Matx22d& sim_img_to_ref, const cv::Matx22f& sim_ref_to_img, int resp_size, int view_id, bool rigid, int scale, cv::Mat_<double>& landmark_lhoods,
const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
// Mean, eigenvalues, eigenvectors
cv::Mat_<double> M = this->pdm.mean_shape;
cv::Mat_<double> E = this->pdm.eigen_values;
//Mat_<double> V = this->pdm.princ_comp;
int m = pdm.NumberOfModes();
cv::Vec6d current_global(initial_global);
cv::Mat_<float> current_local;
initial_local.convertTo(current_local, CV_32F);
cv::Mat_<double> current_shape;
cv::Mat_<double> previous_shape;
// Pre-calculate the regularisation term
cv::Mat_<float> regTerm;
if(rigid)
{
regTerm = cv::Mat_<float>::zeros(6,6);
}
else
{
cv::Mat_<double> regularisations = cv::Mat_<double>::zeros(1, 6 + m);
// Setting the regularisation to the inverse of eigenvalues
cv::Mat(parameters.reg_factor / E).copyTo(regularisations(cv::Rect(6, 0, m, 1)));
cv::Mat_<double> regTerm_d = cv::Mat::diag(regularisations.t());
regTerm_d.convertTo(regTerm, CV_32F);
}
cv::Mat_<float> WeightMatrix;
GetWeightMatrix(WeightMatrix, scale, view_id, parameters);
cv::Mat_<float> dxs, dys;
// The preallocated memory for the mean shifts
cv::Mat_<float> mean_shifts(2 * pdm.NumberOfPoints(), 1, 0.0);
// Number of iterations
for(int iter = 0; iter < parameters.num_optimisation_iteration; iter++)
{
// get the current estimates of x
pdm.CalcShape2D(current_shape, current_local, current_global);
if(iter > 0)
{
// if the shape hasn't changed terminate
if(norm(current_shape, previous_shape) < 0.01)
{
break;
}
}
current_shape.copyTo(previous_shape);
// Jacobian, and transposed weighted jacobian
cv::Mat_<float> J, J_w_t;
// calculate the appropriate Jacobians in 2D, even though the actual behaviour is in 3D, using small angle approximation and oriented shape
if(rigid)
{
pdm.ComputeRigidJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
else
{
pdm.ComputeJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
// useful for mean shift calculation
float a = -0.5/(parameters.sigma * parameters.sigma);
cv::Mat_<double> current_shape_2D = current_shape.reshape(1, 2).t();
cv::Mat_<double> base_shape_2D = base_shape.reshape(1, 2).t();
cv::Mat_<float> offsets;
cv::Mat((current_shape_2D - base_shape_2D) * cv::Mat(sim_img_to_ref).t()).convertTo(offsets, CV_32F);
dxs = offsets.col(0) + (resp_size-1)/2;
dys = offsets.col(1) + (resp_size-1)/2;
NonVectorisedMeanShift_precalc_kde(mean_shifts, patch_expert_responses, dxs, dys, resp_size, a, scale, view_id, kde_resp_precalc);
// Now transform the mean shifts to the the image reference frame, as opposed to one of ref shape (object space)
cv::Mat_<float> mean_shifts_2D = (mean_shifts.reshape(1, 2)).t();
mean_shifts_2D = mean_shifts_2D * cv::Mat(sim_ref_to_img).t();
mean_shifts = cv::Mat(mean_shifts_2D.t()).reshape(1, n*2);
// remove non-visible observations
for(int i = 0; i < n; ++i)
{
// if patch unavailable for current index
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0)
{
cv::Mat Jx = J.row(i);
Jx = cvScalar(0);
cv::Mat Jy = J.row(i+n);
Jy = cvScalar(0);
mean_shifts.at<float>(i,0) = 0.0f;
mean_shifts.at<float>(i+n,0) = 0.0f;
}
}
// projection of the meanshifts onto the jacobians (using the weighted Jacobian, see Baltrusaitis 2013)
cv::Mat_<float> J_w_t_m = J_w_t * mean_shifts;
// Add the regularisation term
if(!rigid)
{
J_w_t_m(cv::Rect(0,6,1, m)) = J_w_t_m(cv::Rect(0,6,1, m)) - regTerm(cv::Rect(6,6, m, m)) * current_local;
}
// Calculating the Hessian approximation
cv::Mat_<float> Hessian = J_w_t * J;
// Add the Tikhonov regularisation
Hessian = Hessian + regTerm;
// Solve for the parameter update (from Baltrusaitis 2013 based on eq (36) Saragih 2011)
cv::Mat_<float> param_update;
cv::solve(Hessian, J_w_t_m, param_update, CV_CHOLESKY);
// update the reference
pdm.UpdateModelParameters(param_update, current_local, current_global);
// clamp to the local parameters for valid expressions
pdm.Clamp(current_local, current_global, parameters);
}
// compute the log likelihood
double loglhood = 0;
landmark_lhoods = cv::Mat_<double>(n, 1, -1e8);
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0 )
{
continue;
}
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
int ii,jj;
float v,vx,vy,sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
v *= exp(-0.5*(vx+vy)/(parameters.sigma * parameters.sigma));
sum += v;
}
}
landmark_lhoods.at<double>(i,0) = (double)sum;
// the offset is there for numerical stability
loglhood += log(sum + 1e-8);
}
loglhood = loglhood/sum(patch_experts.visibilities[scale][view_id])[0];
final_global = current_global;
final_local = current_local;
return loglhood;
}
bool CLNF::RemoveBackground(cv::Mat_<float>& out_depth_image, const cv::Mat_<float>& depth_image)
{
// use the current estimate of the face location to determine what is foreground and background
double tx = this->params_global[4];
double ty = this->params_global[5];
// if we are too close to the edge fail
if(tx - 9 <= 0 || ty - 9 <= 0 || tx + 9 >= depth_image.cols || ty + 9 >= depth_image.rows)
{
cout << "Face estimate is too close to the edge, tracking failed" << endl;
return false;
}
cv::Mat_<double> current_shape;
pdm.CalcShape2D(current_shape, params_local, params_global);
double min_x, max_x, min_y, max_y;
int n = this->pdm.NumberOfPoints();
cv::minMaxLoc(current_shape(cv::Range(0, n), cv::Range(0,1)), &min_x, &max_x);
cv::minMaxLoc(current_shape(cv::Range(n, n*2), cv::Range(0,1)), &min_y, &max_y);
// the area of interest: size of face with some scaling ( these scalings are fairly ad-hoc)
double width = 3 * (max_x - min_x);
double height = 2.5 * (max_y - min_y);
// getting the region of interest from the depth image,
// so we don't get other objects lying at same depth as head in the image but away from it
cv::Rect_<int> roi((int)(tx-width/2), (int)(ty - height/2), (int)width, (int)height);
// clamp it if it does not lie fully in the image
if(roi.x < 0) roi.x = 0;
if(roi.y < 0) roi.y = 0;
if(roi.width + roi.x >= depth_image.cols) roi.x = depth_image.cols - roi.width;
if(roi.height + roi.y >= depth_image.rows) roi.y = depth_image.rows - roi.height;
if(width > depth_image.cols)
{
roi.x = 0; roi.width = depth_image.cols;
}
if(height > depth_image.rows)
{
roi.y = 0; roi.height = depth_image.rows;
}
if(roi.width == 0) roi.width = depth_image.cols;
if(roi.height == 0) roi.height = depth_image.rows;
if(roi.x >= depth_image.cols) roi.x = 0;
if(roi.y >= depth_image.rows) roi.y = 0;
// Initialise the mask
cv::Mat_<uchar> mask(depth_image.rows, depth_image.cols, (uchar)0);
cv::Mat_<uchar> valid_pixels = depth_image > 0;
// check if there is any depth near the estimate
if(cv::sum(valid_pixels(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16))/255)[0] > 0)
{
double Z = cv::mean(depth_image(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16)), valid_pixels(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16)))[0]; // Z offset from the surface of the face
// Only operate within region of interest of the depth image
cv::Mat dRoi = depth_image(roi);
cv::Mat mRoi = mask(roi);
// Filter all pixels further than 20cm away from the current pose depth estimate
cv::inRange(dRoi, Z - 200, Z + 200, mRoi);
// Convert to be either 0 or 1
mask = mask / 255;
cv::Mat_<float> maskF;
mask.convertTo(maskF, CV_32F);
//Filter the depth image
out_depth_image = depth_image.mul(maskF);
}
else
{
cout << "No depth signal found in foreground, tracking failed" << endl;
return false;
}
return true;
}
// Getting a 3D shape model from the current detected landmarks (in camera space)
cv::Mat_<double> CLNF::GetShape(double fx, double fy, double cx, double cy) const
{
int n = this->detected_landmarks.rows/2;
cv::Mat_<double> shape3d(n*3, 1);
this->pdm.CalcShape3D(shape3d, this->params_local);
// Need to rotate the shape to get the actual 3D representation
// get the rotation matrix from the euler angles
cv::Matx33d R = LandmarkDetector::Euler2RotationMatrix(cv::Vec3d(params_global[1], params_global[2], params_global[3]));
shape3d = shape3d.reshape(1, 3);
shape3d = shape3d.t() * cv::Mat(R).t();
// from the weak perspective model can determine the average depth of the object
double Zavg = fx / params_global[0];
cv::Mat_<double> outShape(n,3,0.0);
// this is described in the paper in section 3.4 (equation 10) (of the CLM-Z paper)
for(int i = 0; i < n; i++)
{
double Z = Zavg + shape3d.at<double>(i,2);
double X = Z * ((this->detected_landmarks.at<double>(i) - cx)/fx);
double Y = Z * ((this->detected_landmarks.at<double>(i + n) - cy)/fy);
outShape.at<double>(i,0) = (double)X;
outShape.at<double>(i,1) = (double)Y;
outShape.at<double>(i,2) = (double)Z;
}
// The format is 3 rows - n cols
return outShape.t();
}
// A utility bounding box function
cv::Rect_<double> CLNF::GetBoundingBox() const
{
cv::Mat_<double> xs = this->detected_landmarks(cv::Rect(0,0,1,this->detected_landmarks.rows/2));
cv::Mat_<double> ys = this->detected_landmarks(cv::Rect(0,this->detected_landmarks.rows/2, 1, this->detected_landmarks.rows/2));
double min_x, max_x;
double min_y, max_y;
cv::minMaxLoc(xs, &min_x, &max_x);
cv::minMaxLoc(ys, &min_y, &max_y);
// See if the detections intersect
cv::Rect_<double> model_rect(min_x, min_y, max_x - min_x, max_y - min_y);
return model_rect;
}
// Legacy function not used at the moment
void CLNF::NonVectorisedMeanShift(cv::Mat_<double>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<double> &dxs, const cv::Mat_<double> &dys, int resp_size, double a, int scale, int view_id)
{
int n = dxs.rows;
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0 || sum(patch_expert_responses[i])[0] == 0)
{
out_mean_shifts.at<double>(i,0) = 0;
out_mean_shifts.at<double>(i+n,0) = 0;
continue;
}
// indices of dx, dy
double dx = dxs.at<double>(i);
double dy = dys.at<double>(i);
int ii,jj;
double v,vx,vy,mx=0.0,my=0.0,sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
double kd = exp(a*(vx+vy));
v *= kd;
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
}
}
// setting the actual mean shift update
double msx = (mx/sum - dx);
double msy = (my/sum - dy);
out_mean_shifts.at<double>(i, 0) = msx;
out_mean_shifts.at<double>(i + n, 0) = msy;
}
}