sustaining_gazes/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp

1321 lines
40 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
// all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace: an open source facial behavior analysis toolkit
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency
// in IEEE Winter Conference on Applications of Computer Vision, 2016
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
// Constrained Local Neural Fields for robust facial landmark detection in the wild.
// Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency.
// in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <LandmarkDetectorModel.h>
// Boost includes
#include <filesystem.hpp>
#include <filesystem/fstream.hpp>
// TBB includes
#include <tbb/tbb.h>
// Local includes
#include <LandmarkDetectorUtils.h>
using namespace LandmarkDetector;
//=============================================================================
//=============================================================================
// Constructors
// A default constructor
CLNF::CLNF()
{
FaceModelParameters parameters;
this->Read(parameters.model_location);
}
// Constructor from a model file
CLNF::CLNF(string fname)
{
this->Read(fname);
}
// Copy constructor (makes a deep copy of CLNF)
CLNF::CLNF(const CLNF& other): pdm(other.pdm), params_local(other.params_local.clone()), params_global(other.params_global), detected_landmarks(other.detected_landmarks.clone()),
landmark_likelihoods(other.landmark_likelihoods.clone()), patch_experts(other.patch_experts), landmark_validator(other.landmark_validator), face_detector_location(other.face_detector_location),
hierarchical_mapping(other.hierarchical_mapping), hierarchical_models(other.hierarchical_models), hierarchical_model_names(other.hierarchical_model_names),
hierarchical_params(other.hierarchical_params), eye_model(other.eye_model)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if(!face_detector_location.empty())
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for(size_t i = 0; i < other.triangulations.size(); ++i)
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for(std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it!= other.kde_resp_precalc.end(); it++)
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
this->face_detector_HOG = dlib::get_frontal_face_detector();
}
// Assignment operator for lvalues (makes a deep copy of CLNF)
CLNF & CLNF::operator= (const CLNF& other)
{
if (this != &other) // protect against invalid self-assignment
{
pdm = PDM(other.pdm);
params_local = other.params_local.clone();
params_global = other.params_global;
detected_landmarks = other.detected_landmarks.clone();
landmark_likelihoods =other.landmark_likelihoods.clone();
patch_experts = Patch_experts(other.patch_experts);
landmark_validator = DetectionValidator(other.landmark_validator);
face_detector_location = other.face_detector_location;
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
this->eye_model = other.eye_model;
// Load the CascadeClassifier (as it does not have a proper copy constructor)
if(!face_detector_location.empty())
{
this->face_detector_HAAR.load(face_detector_location);
}
// Make sure the matrices are allocated properly
this->triangulations.resize(other.triangulations.size());
for(size_t i = 0; i < other.triangulations.size(); ++i)
{
// Make sure the matrix is copied.
this->triangulations[i] = other.triangulations[i].clone();
}
// Make sure the matrices are allocated properly
for(std::map<int, cv::Mat_<float>>::const_iterator it = other.kde_resp_precalc.begin(); it!= other.kde_resp_precalc.end(); it++)
{
// Make sure the matrix is copied.
this->kde_resp_precalc.insert(std::pair<int, cv::Mat_<float>>(it->first, it->second.clone()));
}
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
}
face_detector_HOG = dlib::get_frontal_face_detector();
return *this;
}
// Move constructor
CLNF::CLNF(const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
}
// Assignment operator for rvalues
CLNF & CLNF::operator= (const CLNF&& other)
{
this->detection_success = other.detection_success;
this->tracking_initialised = other.tracking_initialised;
this->detection_certainty = other.detection_certainty;
this->model_likelihood = other.model_likelihood;
this->failures_in_a_row = other.failures_in_a_row;
pdm = other.pdm;
params_local = other.params_local;
params_global = other.params_global;
detected_landmarks = other.detected_landmarks;
landmark_likelihoods = other.landmark_likelihoods;
patch_experts = other.patch_experts;
landmark_validator = other.landmark_validator;
face_detector_location = other.face_detector_location;
face_detector_HAAR = other.face_detector_HAAR;
triangulations = other.triangulations;
kde_resp_precalc = other.kde_resp_precalc;
face_detector_HOG = dlib::get_frontal_face_detector();
// Copy over the hierarchical models
this->hierarchical_mapping = other.hierarchical_mapping;
this->hierarchical_models = other.hierarchical_models;
this->hierarchical_model_names = other.hierarchical_model_names;
this->hierarchical_params = other.hierarchical_params;
this->eye_model = other.eye_model;
return *this;
}
void CLNF::Read_CLNF(string clnf_location)
{
// Location of modules
ifstream locations(clnf_location.c_str(), ios_base::in);
if(!locations.is_open())
{
cout << "Couldn't open the CLNF model file aborting" << endl;
cout.flush();
return;
}
string line;
vector<string> intensity_expert_locations;
vector<string> depth_expert_locations;
vector<string> ccnf_expert_locations;
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(clnf_location).parent_path();
// The main file contains the references to other files
while (!locations.eof())
{
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
getline(lineStream, location);
if(location.size() > 0)
location.erase(location.begin()); // remove the first space
// remove carriage return at the end for compatibility with unix systems
if(location.size() > 0 && location.at(location.size()-1) == '\r')
{
location = location.substr(0, location.size()-1);
}
// append the lovstion to root location (boost syntax)
location = (root / location).string();
if (module.compare("PDM") == 0)
{
cout << "Reading the PDM module from: " << location << "....";
pdm.Read(location);
cout << "Done" << endl;
}
else if (module.compare("Triangulations") == 0)
{
cout << "Reading the Triangulations module from: " << location << "....";
ifstream triangulationFile(location.c_str(), ios_base::in);
LandmarkDetector::SkipComments(triangulationFile);
int numViews;
triangulationFile >> numViews;
// read in the triangulations
triangulations.resize(numViews);
for(int i = 0; i < numViews; ++i)
{
LandmarkDetector::SkipComments(triangulationFile);
LandmarkDetector::ReadMat(triangulationFile, triangulations[i]);
}
cout << "Done" << endl;
}
else if(module.compare("PatchesIntensity") == 0)
{
intensity_expert_locations.push_back(location);
}
else if(module.compare("PatchesDepth") == 0)
{
depth_expert_locations.push_back(location);
}
else if(module.compare("PatchesCCNF") == 0)
{
ccnf_expert_locations.push_back(location);
}
}
// Initialise the patch experts
patch_experts.Read(intensity_expert_locations, depth_expert_locations, ccnf_expert_locations);
// Read in a face detector
face_detector_HOG = dlib::get_frontal_face_detector();
}
void CLNF::Read(string main_location)
{
cout << "Reading the CLNF landmark detector/tracker from: " << main_location << endl;
ifstream locations(main_location.c_str(), ios_base::in);
if(!locations.is_open())
{
cout << "Couldn't open the model file, aborting" << endl;
return;
}
string line;
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();
// Assume no eye model, unless read-in
eye_model = false;
// The main file contains the references to other files
while (!locations.eof())
{
getline(locations, line);
stringstream lineStream(line);
string module;
string location;
// figure out which module is to be read from which file
lineStream >> module;
lineStream >> location;
// remove carriage return at the end for compatibility with unix systems
if(location.size() > 0 && location.at(location.size()-1) == '\r')
{
location = location.substr(0, location.size()-1);
}
// append to root
location = (root / location).string();
if (module.compare("LandmarkDetector") == 0)
{
cout << "Reading the landmark detector module from: " << location << endl;
// The CLNF module includes the PDM and the patch experts
Read_CLNF(location);
}
else if(module.compare("LandmarkDetector_part") == 0)
{
string part_name;
lineStream >> part_name;
cout << "Reading part based module...." << part_name << endl;
vector<pair<int, int>> mappings;
while(!lineStream.eof())
{
int ind_in_main;
lineStream >> ind_in_main;
int ind_in_part;
lineStream >> ind_in_part;
mappings.push_back(pair<int, int>(ind_in_main, ind_in_part));
}
this->hierarchical_mapping.push_back(mappings);
CLNF part_model(location);
this->hierarchical_models.push_back(part_model);
this->hierarchical_model_names.push_back(part_name);
FaceModelParameters params;
params.validate_detections = false;
params.refine_hierarchical = false;
params.refine_parameters = false;
if(part_name.compare("left_eye") == 0 || part_name.compare("right_eye") == 0)
{
vector<int> windows_large;
windows_large.push_back(5);
windows_large.push_back(3);
vector<int> windows_small;
windows_small.push_back(5);
windows_small.push_back(3);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.1;
params.sigma = 2;
}
else if(part_name.compare("left_eye_28") == 0 || part_name.compare("right_eye_28") == 0)
{
vector<int> windows_large;
windows_large.push_back(3);
windows_large.push_back(5);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(3);
windows_small.push_back(5);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 0.5;
params.sigma = 1.0;
eye_model = true;
}
else if(part_name.compare("mouth") == 0)
{
vector<int> windows_large;
windows_large.push_back(7);
windows_large.push_back(7);
vector<int> windows_small;
windows_small.push_back(7);
windows_small.push_back(7);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 1.0;
params.sigma = 2.0;
}
else if(part_name.compare("brow") == 0)
{
vector<int> windows_large;
windows_large.push_back(11);
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(11);
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 10.0;
params.sigma = 3.5;
}
else if(part_name.compare("inner") == 0)
{
vector<int> windows_large;
windows_large.push_back(9);
vector<int> windows_small;
windows_small.push_back(9);
params.window_sizes_init = windows_large;
params.window_sizes_small = windows_small;
params.window_sizes_current = windows_large;
params.reg_factor = 2.5;
params.sigma = 1.75;
params.weight_factor = 2.5;
}
this->hierarchical_params.push_back(params);
cout << "Done" << endl;
}
else if (module.compare("DetectionValidator") == 0)
{
cout << "Reading the landmark validation module....";
landmark_validator.Read(location);
cout << "Done" << endl;
}
}
detected_landmarks.create(2 * pdm.NumberOfPoints(), 1);
detected_landmarks.setTo(0);
detection_success = false;
tracking_initialised = false;
model_likelihood = -10; // very low
detection_certainty = 1; // very uncertain
// Initialising default values for the rest of the variables
// local parameters (shape)
params_local.create(pdm.NumberOfModes(), 1);
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
params_global = cv::Vec6d(1, 0, 0, 0, 0, 0);
failures_in_a_row = -1;
}
// Resetting the model (for a new video, or complet reinitialisation
void CLNF::Reset()
{
detected_landmarks.setTo(0);
detection_success = false;
tracking_initialised = false;
model_likelihood = -10; // very low
detection_certainty = 1; // very uncertain
// local parameters (shape)
params_local.setTo(0.0);
// global parameters (pose) [scale, euler_x, euler_y, euler_z, tx, ty]
params_global = cv::Vec6d(1, 0, 0, 0, 0, 0);
failures_in_a_row = -1;
face_template = cv::Mat_<uchar>();
}
// Resetting the model, choosing the face nearest (x,y)
void CLNF::Reset(double x, double y)
{
// First reset the model overall
this->Reset();
// Now in the following frame when face detection takes place this is the point at which it will be preffered
this->preference_det.x = x;
this->preference_det.y = y;
}
// The main internal landmark detection call (should not be used externally?)
bool CLNF::DetectLandmarks(const cv::Mat_<uchar> &image, const cv::Mat_<float> &depth, FaceModelParameters& params)
{
// Fits from the current estimate of local and global parameters in the model
bool fit_success = Fit(image, depth, params.window_sizes_current, params);
// Store the landmarks converged on in detected_landmarks
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
if(params.refine_hierarchical && hierarchical_models.size() > 0)
{
bool parts_used = false;
// Do the hierarchical models in parallel
tbb::parallel_for(0, (int)hierarchical_models.size(), [&](int part_model){
{
// Only do the synthetic eye models if we're doing gaze
if (!((hierarchical_model_names[part_model].compare("right_eye_28") == 0 ||
hierarchical_model_names[part_model].compare("left_eye_28") == 0)
&& !params.track_gaze))
{
int n_part_points = hierarchical_models[part_model].pdm.NumberOfPoints();
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
cv::Mat_<double> part_model_locs(n_part_points * 2, 1, 0.0);
// Extract the corresponding landmarks
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
part_model_locs.at<double>(mappings[mapping_ind].second) = detected_landmarks.at<double>(mappings[mapping_ind].first);
part_model_locs.at<double>(mappings[mapping_ind].second + n_part_points) = detected_landmarks.at<double>(mappings[mapping_ind].first + this->pdm.NumberOfPoints());
}
// Fit the part based model PDM
hierarchical_models[part_model].pdm.CalcParams(hierarchical_models[part_model].params_global, hierarchical_models[part_model].params_local, part_model_locs);
// Only do this if we don't need to upsample
if (params_global[0] > 0.9 * hierarchical_models[part_model].patch_experts.patch_scaling[0])
{
parts_used = true;
this->hierarchical_params[part_model].window_sizes_current = this->hierarchical_params[part_model].window_sizes_init;
// Do the actual landmark detection
hierarchical_models[part_model].DetectLandmarks(image, depth, hierarchical_params[part_model]);
}
else
{
hierarchical_models[part_model].pdm.CalcShape2D(hierarchical_models[part_model].detected_landmarks, hierarchical_models[part_model].params_local, hierarchical_models[part_model].params_global);
}
}
}
});
// Recompute main model based on the fit part models
if(parts_used)
{
for (size_t part_model = 0; part_model < hierarchical_models.size(); ++part_model)
{
vector<pair<int, int>> mappings = this->hierarchical_mapping[part_model];
if (!((hierarchical_model_names[part_model].compare("right_eye_28") == 0 ||
hierarchical_model_names[part_model].compare("left_eye_28") == 0)
&& !params.track_gaze))
{
// Reincorporate the models into main tracker
for (size_t mapping_ind = 0; mapping_ind < mappings.size(); ++mapping_ind)
{
detected_landmarks.at<double>(mappings[mapping_ind].first) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second);
detected_landmarks.at<double>(mappings[mapping_ind].first + pdm.NumberOfPoints()) = hierarchical_models[part_model].detected_landmarks.at<double>(mappings[mapping_ind].second + hierarchical_models[part_model].pdm.NumberOfPoints());
}
}
}
pdm.CalcParams(params_global, params_local, detected_landmarks);
pdm.CalcShape2D(detected_landmarks, params_local, params_global);
}
}
// Check detection correctness
if(params.validate_detections && fit_success)
{
cv::Vec3d orientation(params_global[1], params_global[2], params_global[3]);
detection_certainty = landmark_validator.Check(orientation, image, detected_landmarks);
detection_success = detection_certainty < params.validation_boundary;
}
else
{
detection_success = fit_success;
if(fit_success)
{
detection_certainty = -1;
}
else
{
detection_certainty = 1;
}
}
return detection_success;
}
//=============================================================================
bool CLNF::Fit(const cv::Mat_<uchar>& im, const cv::Mat_<float>& depthImg, const std::vector<int>& window_sizes, const FaceModelParameters& parameters)
{
// Making sure it is a single channel image
assert(im.channels() == 1);
// Placeholder for the landmarks
cv::Mat_<double> current_shape(2 * pdm.NumberOfPoints() , 1, 0.0);
int n = pdm.NumberOfPoints();
cv::Mat_<float> depth_img_no_background;
// Background elimination from the depth image
if(!depthImg.empty())
{
bool success = RemoveBackground(depth_img_no_background, depthImg);
// The attempted background removal can fail leading to tracking failure
if(!success)
{
return false;
}
}
int num_scales = patch_experts.patch_scaling.size();
// Storing the patch expert response maps
vector<cv::Mat_<float> > patch_expert_responses(n);
// Converting from image space to patch expert space (normalised for rotation and scale)
cv::Matx22f sim_ref_to_img;
cv::Matx22d sim_img_to_ref;
FaceModelParameters tmp_parameters = parameters;
// Optimise the model across a number of areas of interest (usually in descending window size and ascending scale size)
for(int scale = 0; scale < num_scales; scale++)
{
int window_size = window_sizes[scale];
if(window_size == 0 || 0.9 * patch_experts.patch_scaling[scale] > params_global[0])
continue;
// The patch expert response computation
if(scale != window_sizes.size() - 1)
{
patch_experts.Response(patch_expert_responses, sim_ref_to_img, sim_img_to_ref, im, depth_img_no_background, pdm, params_global, params_local, window_size, scale);
}
else
{
// Do not use depth for the final iteration as it is not as accurate
patch_experts.Response(patch_expert_responses, sim_ref_to_img, sim_img_to_ref, im, cv::Mat(), pdm, params_global, params_local, window_size, scale);
}
if(parameters.refine_parameters == true)
{
// Adapt the parameters based on scale (wan't to reduce regularisation as scale increases, but increa sigma and tikhonov)
tmp_parameters.reg_factor = parameters.reg_factor - 15 * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
if(tmp_parameters.reg_factor <= 0)
tmp_parameters.reg_factor = 0.001;
tmp_parameters.sigma = parameters.sigma + 0.25 * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
tmp_parameters.weight_factor = parameters.weight_factor + 2 * parameters.weight_factor * log(patch_experts.patch_scaling[scale]/0.25)/log(2);
}
// Get the current landmark locations
pdm.CalcShape2D(current_shape, params_local, params_global);
// Get the view used by patch experts
int view_id = patch_experts.GetViewIdx(params_global, scale);
// the actual optimisation step
this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, true, scale, this->landmark_likelihoods, tmp_parameters);
// non-rigid optimisation
this->model_likelihood = this->NU_RLMS(params_global, params_local, patch_expert_responses, cv::Vec6d(params_global), params_local.clone(), current_shape, sim_img_to_ref, sim_ref_to_img, window_size, view_id, false, scale, this->landmark_likelihoods, tmp_parameters);
// Can't track very small images reliably (less than ~30px across)
if(params_global[0] < 0.25)
{
cout << "Face too small for landmark detection" << endl;
return false;
}
}
return true;
}
void CLNF::NonVectorisedMeanShift_precalc_kde(cv::Mat_<float>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<float> &dxs, const cv::Mat_<float> &dys, int resp_size, float a, int scale, int view_id, map<int, cv::Mat_<float> >& kde_resp_precalc)
{
int n = dxs.rows;
cv::Mat_<float> kde_resp;
float step_size = 0.1;
// if this has not been precomputer, precompute it, otherwise use it
if(kde_resp_precalc.find(resp_size) == kde_resp_precalc.end())
{
kde_resp = cv::Mat_<float>((int)((resp_size / step_size)*(resp_size/step_size)), resp_size * resp_size);
cv::MatIterator_<float> kde_it = kde_resp.begin();
for(int x = 0; x < resp_size/step_size; x++)
{
float dx = x * step_size;
for(int y = 0; y < resp_size/step_size; y++)
{
float dy = y * step_size;
int ii,jj;
float v,vx,vy;
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the KDE evaluation of that point
v = exp(a*(vx+vy));
*kde_it++ = v;
}
}
}
}
kde_resp_precalc[resp_size] = kde_resp.clone();
}
else
{
// use the precomputed version
kde_resp = kde_resp_precalc.find(resp_size)->second;
}
// for every point (patch) calculating mean-shift
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0)
{
out_mean_shifts.at<float>(i,0) = 0;
out_mean_shifts.at<float>(i+n,0) = 0;
continue;
}
// indices of dx, dy
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
// Ensure that we are within bounds (important for precalculation)
if(dx < 0)
dx = 0;
if(dy < 0)
dy = 0;
if(dx > resp_size - step_size)
dx = resp_size - step_size;
if(dy > resp_size - step_size)
dy = resp_size - step_size;
// Pick the row from precalculated kde that approximates the current dx, dy best
int closest_col = (int)(dy /step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int closest_row = (int)(dx /step_size + 0.5); // Plus 0.5 is there, as C++ rounds down with int cast
int idx = closest_row * ((int)(resp_size/step_size + 0.5)) + closest_col; // Plus 0.5 is there, as C++ rounds down with int cast
cv::MatIterator_<float> kde_it = kde_resp.begin() + kde_resp.cols*idx;
float mx=0.0;
float my=0.0;
float sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(int ii = 0; ii < resp_size; ii++)
{
for(int jj = 0; jj < resp_size; jj++)
{
// the KDE evaluation of that point multiplied by the probability at the current, xi, yi
float v = (*p++) * (*kde_it++);
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
}
}
float msx = (mx/sum - dx);
float msy = (my/sum - dy);
out_mean_shifts.at<float>(i,0) = msx;
out_mean_shifts.at<float>(i+n,0) = msy;
}
}
void CLNF::GetWeightMatrix(cv::Mat_<float>& WeightMatrix, int scale, int view_id, const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
// Is the weight matrix needed at all
if(parameters.weight_factor > 0)
{
WeightMatrix = cv::Mat_<float>::zeros(n*2, n*2);
for (int p=0; p < n; p++)
{
if(!patch_experts.ccnf_expert_intensity.empty())
{
// for the x dimension
WeightMatrix.at<float>(p,p) = WeightMatrix.at<float>(p,p) + patch_experts.ccnf_expert_intensity[scale][view_id][p].patch_confidence;
// for they y dimension
WeightMatrix.at<float>(p+n,p+n) = WeightMatrix.at<float>(p,p);
}
else
{
// Across the modalities add the confidences
for(size_t pc=0; pc < patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.size(); pc++)
{
// for the x dimension
WeightMatrix.at<float>(p,p) = WeightMatrix.at<float>(p,p) + patch_experts.svr_expert_intensity[scale][view_id][p].svr_patch_experts.at(pc).confidence;
}
// for the y dimension
WeightMatrix.at<float>(p+n,p+n) = WeightMatrix.at<float>(p,p);
}
}
WeightMatrix = parameters.weight_factor * WeightMatrix;
}
else
{
WeightMatrix = cv::Mat_<float>::eye(n*2, n*2);
}
}
//=============================================================================
double CLNF::NU_RLMS(cv::Vec6d& final_global, cv::Mat_<double>& final_local, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Vec6d& initial_global, const cv::Mat_<double>& initial_local,
const cv::Mat_<double>& base_shape, const cv::Matx22d& sim_img_to_ref, const cv::Matx22f& sim_ref_to_img, int resp_size, int view_id, bool rigid, int scale, cv::Mat_<double>& landmark_lhoods,
const FaceModelParameters& parameters)
{
int n = pdm.NumberOfPoints();
// Mean, eigenvalues, eigenvectors
cv::Mat_<double> M = this->pdm.mean_shape;
cv::Mat_<double> E = this->pdm.eigen_values;
//Mat_<double> V = this->pdm.princ_comp;
int m = pdm.NumberOfModes();
cv::Vec6d current_global(initial_global);
cv::Mat_<float> current_local;
initial_local.convertTo(current_local, CV_32F);
cv::Mat_<double> current_shape;
cv::Mat_<double> previous_shape;
// Pre-calculate the regularisation term
cv::Mat_<float> regTerm;
if(rigid)
{
regTerm = cv::Mat_<float>::zeros(6,6);
}
else
{
cv::Mat_<double> regularisations = cv::Mat_<double>::zeros(1, 6 + m);
// Setting the regularisation to the inverse of eigenvalues
cv::Mat(parameters.reg_factor / E).copyTo(regularisations(cv::Rect(6, 0, m, 1)));
cv::Mat_<double> regTerm_d = cv::Mat::diag(regularisations.t());
regTerm_d.convertTo(regTerm, CV_32F);
}
cv::Mat_<float> WeightMatrix;
GetWeightMatrix(WeightMatrix, scale, view_id, parameters);
cv::Mat_<float> dxs, dys;
// The preallocated memory for the mean shifts
cv::Mat_<float> mean_shifts(2 * pdm.NumberOfPoints(), 1, 0.0);
// Number of iterations
for(int iter = 0; iter < parameters.num_optimisation_iteration; iter++)
{
// get the current estimates of x
pdm.CalcShape2D(current_shape, current_local, current_global);
if(iter > 0)
{
// if the shape hasn't changed terminate
if(norm(current_shape, previous_shape) < 0.01)
{
break;
}
}
current_shape.copyTo(previous_shape);
// Jacobian, and transposed weighted jacobian
cv::Mat_<float> J, J_w_t;
// calculate the appropriate Jacobians in 2D, even though the actual behaviour is in 3D, using small angle approximation and oriented shape
if(rigid)
{
pdm.ComputeRigidJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
else
{
pdm.ComputeJacobian(current_local, current_global, J, WeightMatrix, J_w_t);
}
// useful for mean shift calculation
float a = -0.5/(parameters.sigma * parameters.sigma);
cv::Mat_<double> current_shape_2D = current_shape.reshape(1, 2).t();
cv::Mat_<double> base_shape_2D = base_shape.reshape(1, 2).t();
cv::Mat_<float> offsets;
cv::Mat((current_shape_2D - base_shape_2D) * cv::Mat(sim_img_to_ref).t()).convertTo(offsets, CV_32F);
dxs = offsets.col(0) + (resp_size-1)/2;
dys = offsets.col(1) + (resp_size-1)/2;
NonVectorisedMeanShift_precalc_kde(mean_shifts, patch_expert_responses, dxs, dys, resp_size, a, scale, view_id, kde_resp_precalc);
// Now transform the mean shifts to the the image reference frame, as opposed to one of ref shape (object space)
cv::Mat_<float> mean_shifts_2D = (mean_shifts.reshape(1, 2)).t();
mean_shifts_2D = mean_shifts_2D * cv::Mat(sim_ref_to_img).t();
mean_shifts = cv::Mat(mean_shifts_2D.t()).reshape(1, n*2);
// remove non-visible observations
for(int i = 0; i < n; ++i)
{
// if patch unavailable for current index
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0)
{
cv::Mat Jx = J.row(i);
Jx = cvScalar(0);
cv::Mat Jy = J.row(i+n);
Jy = cvScalar(0);
mean_shifts.at<float>(i,0) = 0.0f;
mean_shifts.at<float>(i+n,0) = 0.0f;
}
}
// projection of the meanshifts onto the jacobians (using the weighted Jacobian, see Baltrusaitis 2013)
cv::Mat_<float> J_w_t_m = J_w_t * mean_shifts;
// Add the regularisation term
if(!rigid)
{
J_w_t_m(cv::Rect(0,6,1, m)) = J_w_t_m(cv::Rect(0,6,1, m)) - regTerm(cv::Rect(6,6, m, m)) * current_local;
}
// Calculating the Hessian approximation
cv::Mat_<float> Hessian = J_w_t * J;
// Add the Tikhonov regularisation
Hessian = Hessian + regTerm;
// Solve for the parameter update (from Baltrusaitis 2013 based on eq (36) Saragih 2011)
cv::Mat_<float> param_update;
cv::solve(Hessian, J_w_t_m, param_update, CV_CHOLESKY);
// update the reference
pdm.UpdateModelParameters(param_update, current_local, current_global);
// clamp to the local parameters for valid expressions
pdm.Clamp(current_local, current_global, parameters);
}
// compute the log likelihood
double loglhood = 0;
landmark_lhoods = cv::Mat_<double>(n, 1, -1e8);
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0 )
{
continue;
}
float dx = dxs.at<float>(i);
float dy = dys.at<float>(i);
int ii,jj;
float v,vx,vy,sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
v *= exp(-0.5*(vx+vy)/(parameters.sigma * parameters.sigma));
sum += v;
}
}
landmark_lhoods.at<double>(i,0) = (double)sum;
// the offset is there for numerical stability
loglhood += log(sum + 1e-8);
}
loglhood = loglhood/sum(patch_experts.visibilities[scale][view_id])[0];
final_global = current_global;
final_local = current_local;
return loglhood;
}
bool CLNF::RemoveBackground(cv::Mat_<float>& out_depth_image, const cv::Mat_<float>& depth_image)
{
// use the current estimate of the face location to determine what is foreground and background
double tx = this->params_global[4];
double ty = this->params_global[5];
// if we are too close to the edge fail
if(tx - 9 <= 0 || ty - 9 <= 0 || tx + 9 >= depth_image.cols || ty + 9 >= depth_image.rows)
{
cout << "Face estimate is too close to the edge, tracking failed" << endl;
return false;
}
cv::Mat_<double> current_shape;
pdm.CalcShape2D(current_shape, params_local, params_global);
double min_x, max_x, min_y, max_y;
int n = this->pdm.NumberOfPoints();
cv::minMaxLoc(current_shape(cv::Range(0, n), cv::Range(0,1)), &min_x, &max_x);
cv::minMaxLoc(current_shape(cv::Range(n, n*2), cv::Range(0,1)), &min_y, &max_y);
// the area of interest: size of face with some scaling ( these scalings are fairly ad-hoc)
double width = 3 * (max_x - min_x);
double height = 2.5 * (max_y - min_y);
// getting the region of interest from the depth image,
// so we don't get other objects lying at same depth as head in the image but away from it
cv::Rect_<int> roi((int)(tx-width/2), (int)(ty - height/2), (int)width, (int)height);
// clamp it if it does not lie fully in the image
if(roi.x < 0) roi.x = 0;
if(roi.y < 0) roi.y = 0;
if(roi.width + roi.x >= depth_image.cols) roi.x = depth_image.cols - roi.width;
if(roi.height + roi.y >= depth_image.rows) roi.y = depth_image.rows - roi.height;
if(width > depth_image.cols)
{
roi.x = 0; roi.width = depth_image.cols;
}
if(height > depth_image.rows)
{
roi.y = 0; roi.height = depth_image.rows;
}
if(roi.width == 0) roi.width = depth_image.cols;
if(roi.height == 0) roi.height = depth_image.rows;
if(roi.x >= depth_image.cols) roi.x = 0;
if(roi.y >= depth_image.rows) roi.y = 0;
// Initialise the mask
cv::Mat_<uchar> mask(depth_image.rows, depth_image.cols, (uchar)0);
cv::Mat_<uchar> valid_pixels = depth_image > 0;
// check if there is any depth near the estimate
if(cv::sum(valid_pixels(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16))/255)[0] > 0)
{
double Z = cv::mean(depth_image(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16)), valid_pixels(cv::Rect((int)tx - 8, (int)ty - 8, 16, 16)))[0]; // Z offset from the surface of the face
// Only operate within region of interest of the depth image
cv::Mat dRoi = depth_image(roi);
cv::Mat mRoi = mask(roi);
// Filter all pixels further than 20cm away from the current pose depth estimate
cv::inRange(dRoi, Z - 200, Z + 200, mRoi);
// Convert to be either 0 or 1
mask = mask / 255;
cv::Mat_<float> maskF;
mask.convertTo(maskF, CV_32F);
//Filter the depth image
out_depth_image = depth_image.mul(maskF);
}
else
{
cout << "No depth signal found in foreground, tracking failed" << endl;
return false;
}
return true;
}
// Getting a 3D shape model from the current detected landmarks (in camera space)
cv::Mat_<double> CLNF::GetShape(double fx, double fy, double cx, double cy) const
{
int n = this->detected_landmarks.rows/2;
cv::Mat_<double> shape3d(n*3, 1);
this->pdm.CalcShape3D(shape3d, this->params_local);
// Need to rotate the shape to get the actual 3D representation
// get the rotation matrix from the euler angles
cv::Matx33d R = LandmarkDetector::Euler2RotationMatrix(cv::Vec3d(params_global[1], params_global[2], params_global[3]));
shape3d = shape3d.reshape(1, 3);
shape3d = shape3d.t() * cv::Mat(R).t();
// from the weak perspective model can determine the average depth of the object
double Zavg = fx / params_global[0];
cv::Mat_<double> outShape(n,3,0.0);
// this is described in the paper in section 3.4 (equation 10) (of the CLM-Z paper)
for(int i = 0; i < n; i++)
{
double Z = Zavg + shape3d.at<double>(i,2);
double X = Z * ((this->detected_landmarks.at<double>(i) - cx)/fx);
double Y = Z * ((this->detected_landmarks.at<double>(i + n) - cy)/fy);
outShape.at<double>(i,0) = (double)X;
outShape.at<double>(i,1) = (double)Y;
outShape.at<double>(i,2) = (double)Z;
}
// The format is 3 rows - n cols
return outShape.t();
}
// A utility bounding box function
cv::Rect_<double> CLNF::GetBoundingBox() const
{
cv::Mat_<double> xs = this->detected_landmarks(cv::Rect(0,0,1,this->detected_landmarks.rows/2));
cv::Mat_<double> ys = this->detected_landmarks(cv::Rect(0,this->detected_landmarks.rows/2, 1, this->detected_landmarks.rows/2));
double min_x, max_x;
double min_y, max_y;
cv::minMaxLoc(xs, &min_x, &max_x);
cv::minMaxLoc(ys, &min_y, &max_y);
// See if the detections intersect
cv::Rect_<double> model_rect(min_x, min_y, max_x - min_x, max_y - min_y);
return model_rect;
}
// Legacy function not used at the moment
void CLNF::NonVectorisedMeanShift(cv::Mat_<double>& out_mean_shifts, const vector<cv::Mat_<float> >& patch_expert_responses, const cv::Mat_<double> &dxs, const cv::Mat_<double> &dys, int resp_size, double a, int scale, int view_id)
{
int n = dxs.rows;
for(int i = 0; i < n; i++)
{
if(patch_experts.visibilities[scale][view_id].at<int>(i,0) == 0 || sum(patch_expert_responses[i])[0] == 0)
{
out_mean_shifts.at<double>(i,0) = 0;
out_mean_shifts.at<double>(i+n,0) = 0;
continue;
}
// indices of dx, dy
double dx = dxs.at<double>(i);
double dy = dys.at<double>(i);
int ii,jj;
double v,vx,vy,mx=0.0,my=0.0,sum=0.0;
// Iterate over the patch responses here
cv::MatConstIterator_<float> p = patch_expert_responses[i].begin();
for(ii = 0; ii < resp_size; ii++)
{
vx = (dy-ii)*(dy-ii);
for(jj = 0; jj < resp_size; jj++)
{
vy = (dx-jj)*(dx-jj);
// the probability at the current, xi, yi
v = *p++;
// the KDE evaluation of that point
double kd = exp(a*(vx+vy));
v *= kd;
sum += v;
// mean shift in x and y
mx += v*jj;
my += v*ii;
}
}
// setting the actual mean shift update
double msx = (mx/sum - dx);
double msy = (my/sum - dy);
out_mean_shifts.at<double>(i, 0) = msx;
out_mean_shifts.at<double>(i + n, 0) = msy;
}
}