sustaining_gazes/lib/local/LandmarkDetector/src/Patch_experts.cpp

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
// all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
//     * Any publications arising from the use of this software, including but
//       not limited to academic journal and conference publications, technical
//       reports and manuals, must cite at least one of the following works:
//
//       OpenFace: an open source facial behavior analysis toolkit
//       Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency
//       in IEEE Winter Conference on Applications of Computer Vision, 2016
//
//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
//       in IEEE International. Conference on Computer Vision (ICCV),  2015
//
//       Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
//       in Facial Expression Recognition and Analysis Challenge,
//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
//       Constrained Local Neural Fields for robust facial landmark detection in the wild.
//       Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency.
//       in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.
//
///////////////////////////////////////////////////////////////////////////////

#include "stdafx.h"

#include "Patch_experts.h"

// OpenCV includes
#include <opencv2/core/core_c.h>
#include <opencv2/imgproc/imgproc_c.h>

// TBB includes
#include <tbb/tbb.h>

// Math includes
#define _USE_MATH_DEFINES
#include <cmath>

#ifndef M_PI
	#define M_PI 3.14159265358979323846
#endif

#include "LandmarkDetectorUtils.h"

using namespace LandmarkDetector;

// A copy constructor
Patch_experts::Patch_experts(const Patch_experts& other) : patch_scaling(other.patch_scaling), centers(other.centers), svr_expert_intensity(other.svr_expert_intensity), svr_expert_depth(other.svr_expert_depth), ccnf_expert_intensity(other.ccnf_expert_intensity)
{

	// Make sure the matrices are allocated properly
	this->sigma_components.resize(other.sigma_components.size());
	for (size_t i = 0; i < other.sigma_components.size(); ++i)
	{
		this->sigma_components[i].resize(other.sigma_components[i].size());

		for (size_t j = 0; j < other.sigma_components[i].size(); ++j)
		{
			// Make sure the matrix is copied.
			this->sigma_components[i][j] = other.sigma_components[i][j].clone();
		}
	}

	// Make sure the matrices are allocated properly
	this->visibilities.resize(other.visibilities.size());
	for (size_t i = 0; i < other.visibilities.size(); ++i)
	{
		this->visibilities[i].resize(other.visibilities[i].size());

		for (size_t j = 0; j < other.visibilities[i].size(); ++j)
		{
			// Make sure the matrix is copied.
			this->visibilities[i][j] = other.visibilities[i][j].clone();
		}
	}
}

// Returns the patch expert responses given a grayscale and an optional depth image.
// Additionally returns the transform from the image coordinates to the response coordinates (and vice versa).
// The computation also requires the current landmark locations to compute response around, the PDM corresponding to the desired model, and the parameters describing its instance
// Also need to provide the size of the area of interest and the desired scale of analysis
void Patch_experts::Response(vector<cv::Mat_<float> >& patch_expert_responses, cv::Matx22f& sim_ref_to_img, cv::Matx22d& sim_img_to_ref, const cv::Mat_<uchar>& grayscale_image, const cv::Mat_<float>& depth_image,
							 const PDM& pdm, const cv::Vec6d& params_global, const cv::Mat_<double>& params_local, int window_size, int scale)
{

	int view_id = GetViewIdx(params_global, scale);

	int n = pdm.NumberOfPoints();

	// Compute the current landmark locations (around which responses will be computed)
	cv::Mat_<double> landmark_locations;

	pdm.CalcShape2D(landmark_locations, params_local, params_global);

	cv::Mat_<double> reference_shape;

	// Initialise the reference shape on which we'll be warping
	cv::Vec6d global_ref(patch_scaling[scale], 0, 0, 0, 0, 0);

	// Compute the reference shape
	pdm.CalcShape2D(reference_shape, params_local, global_ref);

	// similarity and inverse similarity transform to and from image and reference shape
	cv::Mat_<double> reference_shape_2D = (reference_shape.reshape(1, 2).t());
	cv::Mat_<double> image_shape_2D = landmark_locations.reshape(1, 2).t();

	sim_img_to_ref = AlignShapesWithScale(image_shape_2D, reference_shape_2D);
	cv::Matx22d sim_ref_to_img_d = sim_img_to_ref.inv(cv::DECOMP_LU);

	double a1 = sim_ref_to_img_d(0,0);
	double b1 = -sim_ref_to_img_d(0,1);

	sim_ref_to_img(0,0) = (float)sim_ref_to_img_d(0,0);
	sim_ref_to_img(0,1) = (float)sim_ref_to_img_d(0,1);
	sim_ref_to_img(1,0) = (float)sim_ref_to_img_d(1,0);
	sim_ref_to_img(1,1) = (float)sim_ref_to_img_d(1,1);

	// Indicates the legal pixels in a depth image, if available (used for CLM-Z area of interest (window) interpolation)
	cv::Mat_<uchar> mask;
	if(!depth_image.empty())
	{
		mask = depth_image > 0;
		mask = mask / 255;
	}


	bool use_ccnf = !this->ccnf_expert_intensity.empty();

	// If using CCNF patch experts might need to precalculate Sigmas
	if(use_ccnf)
	{
		vector<cv::Mat_<float> > sigma_components;

		// Retrieve the correct sigma component size
		for( size_t w_size = 0; w_size < this->sigma_components.size(); ++w_size)
		{
			if(!this->sigma_components[w_size].empty())
			{
				if(window_size*window_size == this->sigma_components[w_size][0].rows)
				{
					sigma_components = this->sigma_components[w_size];
				}
			}
		}

		// Go through all of the landmarks and compute the Sigma for each
		for( int lmark = 0; lmark < n; lmark++)
		{
			// Only for visible landmarks
			if(visibilities[scale][view_id].at<int>(lmark,0))
			{
				// Precompute sigmas if they are not computed yet
				ccnf_expert_intensity[scale][view_id][lmark].ComputeSigmas(sigma_components, window_size);
			}
		}

	}

	// calculate the patch responses for every landmark, Actual work happens here. If openMP is turned on it is possible to do this in parallel,
	// this might work well on some machines, while potentially have an adverse effect on others
#ifdef _OPENMP
#pragma omp parallel for
#endif
	tbb::parallel_for(0, (int)n, [&](int i){
	//for(int i = 0; i < n; i++)
	{

		if(visibilities[scale][view_id].rows == n)
		{
			if(visibilities[scale][view_id].at<int>(i,0) != 0)
			{

				// Work out how big the area of interest has to be to get a response of window size
				int area_of_interest_width;
				int area_of_interest_height;

				if(use_ccnf)
				{
					area_of_interest_width = window_size + ccnf_expert_intensity[scale][view_id][i].width - 1;
					area_of_interest_height = window_size + ccnf_expert_intensity[scale][view_id][i].height - 1;
				}
				else
				{
					area_of_interest_width = window_size + svr_expert_intensity[scale][view_id][i].width - 1;
					area_of_interest_height = window_size + svr_expert_intensity[scale][view_id][i].height - 1;
				}

				// scale and rotate to mean shape to reference frame
				cv::Mat sim = (cv::Mat_<float>(2,3) << a1, -b1, landmark_locations.at<double>(i,0), b1, a1, landmark_locations.at<double>(i+n,0));

				// Extract the region of interest around the current landmark location
				cv::Mat_<float> area_of_interest(area_of_interest_height, area_of_interest_width);

				// Using C style openCV as it does what we need
				CvMat area_of_interest_o = area_of_interest;
				CvMat sim_o = sim;
				IplImage im_o = grayscale_image;
				cvGetQuadrangleSubPix(&im_o, &area_of_interest_o, &sim_o);

				// get the correct size response window
				patch_expert_responses[i] = cv::Mat_<float>(window_size, window_size);

				// Get intensity response either from the SVR or CCNF patch experts (prefer CCNF)
				if(!ccnf_expert_intensity.empty())
				{

					ccnf_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]);
				}
				else
				{
					svr_expert_intensity[scale][view_id][i].Response(area_of_interest, patch_expert_responses[i]);
				}

				// if we have a corresponding depth patch and it is visible
				if(!svr_expert_depth.empty() && !depth_image.empty() && visibilities[scale][view_id].at<int>(i,0))
				{

					cv::Mat_<float> dProb = patch_expert_responses[i].clone();
					cv::Mat_<float> depthWindow(area_of_interest_height, area_of_interest_width);


					CvMat dimg_o = depthWindow;
					cv::Mat maskWindow(area_of_interest_height, area_of_interest_width, CV_32F);
					CvMat mimg_o = maskWindow;

					IplImage d_o = depth_image;
					IplImage m_o = mask;

					cvGetQuadrangleSubPix(&d_o,&dimg_o,&sim_o);

					cvGetQuadrangleSubPix(&m_o,&mimg_o,&sim_o);

					depthWindow.setTo(0, maskWindow < 1);

					svr_expert_depth[scale][view_id][i].ResponseDepth(depthWindow, dProb);

					// Sum to one
					double sum = cv::sum(patch_expert_responses[i])[0];

					// To avoid division by 0 issues
					if(sum == 0)
					{
						sum = 1;
					}

					patch_expert_responses[i] /= sum;

					// Sum to one
					sum = cv::sum(dProb)[0];
					// To avoid division by 0 issues
					if(sum == 0)
					{
						sum = 1;
					}

					dProb /= sum;

					patch_expert_responses[i] = patch_expert_responses[i] + dProb;

				}
			}
		}
	}
	});

}

//=============================================================================
// Getting the closest view center based on orientation
int Patch_experts::GetViewIdx(const cv::Vec6d& params_global, int scale) const
{
	int idx = 0;

	double dbest;

	for(int i = 0; i < this->nViews(scale); i++)
	{
		double v1 = params_global[1] - centers[scale][i][0];
		double v2 = params_global[2] - centers[scale][i][1];
		double v3 = params_global[3] - centers[scale][i][2];

		double d = v1*v1 + v2*v2 + v3*v3;

		if(i == 0 || d < dbest)
		{
			dbest = d;
			idx = i;
		}
	}
	return idx;
}


//===========================================================================
void Patch_experts::Read(vector<string> intensity_svr_expert_locations, vector<string> depth_svr_expert_locations, vector<string> intensity_ccnf_expert_locations)
{

	// initialise the SVR intensity patch expert parameters
	int num_intensity_svr = intensity_svr_expert_locations.size();
	centers.resize(num_intensity_svr);
	visibilities.resize(num_intensity_svr);
	patch_scaling.resize(num_intensity_svr);

	svr_expert_intensity.resize(num_intensity_svr);

	// Reading in SVR intensity patch experts for each scales it is defined in
	for(int scale = 0; scale < num_intensity_svr; ++scale)
	{
		string location = intensity_svr_expert_locations[scale];
		cout << "Reading the intensity SVR patch experts from: " << location << "....";
		Read_SVR_patch_experts(location,  centers[scale], visibilities[scale], svr_expert_intensity[scale], patch_scaling[scale]);
	}

	// Initialise and read CCNF patch experts (currently only intensity based),
	int num_intensity_ccnf = intensity_ccnf_expert_locations.size();

	// CCNF experts override the SVR ones
	if(num_intensity_ccnf > 0)
	{
		centers.resize(num_intensity_ccnf);
		visibilities.resize(num_intensity_ccnf);
		patch_scaling.resize(num_intensity_ccnf);
		ccnf_expert_intensity.resize(num_intensity_ccnf);
	}

	for(int scale = 0; scale < num_intensity_ccnf; ++scale)
	{
		string location = intensity_ccnf_expert_locations[scale];
		cout << "Reading the intensity CCNF patch experts from: " << location << "....";
		Read_CCNF_patch_experts(location,  centers[scale], visibilities[scale], ccnf_expert_intensity[scale], patch_scaling[scale]);
	}


	// initialise the SVR depth patch expert parameters
	int num_depth_scales = depth_svr_expert_locations.size();
	int num_intensity_scales = centers.size();

	if(num_depth_scales > 0 && num_intensity_scales != num_depth_scales)
	{
		cout << "Intensity and depth patch experts have a different number of scales, can't read depth" << endl;
		return;
	}

	// Have these to confirm that depth patch experts have the same number of views and scales and have the same visibilities
	vector<vector<cv::Vec3d> > centers_depth(num_depth_scales);
	vector<vector<cv::Mat_<int> > > visibilities_depth(num_depth_scales);
	vector<double> patch_scaling_depth(num_depth_scales);

	svr_expert_depth.resize(num_depth_scales);

	// Reading in SVR intensity patch experts for each scales it is defined in
	for(int scale = 0; scale < num_depth_scales; ++scale)
	{
		string location = depth_svr_expert_locations[scale];
		cout << "Reading the depth SVR patch experts from: " << location << "....";
		Read_SVR_patch_experts(location,  centers_depth[scale], visibilities_depth[scale], svr_expert_depth[scale], patch_scaling_depth[scale]);

		// Check if the scales are identical
		if(patch_scaling_depth[scale] != patch_scaling[scale])
		{
			cout << "Intensity and depth patch experts have a different scales, can't read depth" << endl;
			svr_expert_depth.clear();
			return;
		}

		int num_views_intensity = centers[scale].size();
		int num_views_depth = centers_depth[scale].size();

		// Check if the number of views is identical
		if(num_views_intensity != num_views_depth)
		{
			cout << "Intensity and depth patch experts have a different number of scales, can't read depth" << endl;
			svr_expert_depth.clear();
			return;
		}

		for(int view = 0; view < num_views_depth; ++view)
		{
			if(cv::countNonZero(centers_depth[scale][view] != centers[scale][view]) || cv::countNonZero(visibilities[scale][view] != visibilities_depth[scale][view]))
			{
				cout << "Intensity and depth patch experts have different visibilities or centers" << endl;
				svr_expert_depth.clear();
				return;
			}
		}
	}

}
//======================= Reading the SVR patch experts =========================================//
void Patch_experts::Read_SVR_patch_experts(string expert_location, std::vector<cv::Vec3d>& centers, std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<Multi_SVR_patch_expert> >& patches, double& scale)
{

	ifstream patchesFile(expert_location.c_str(), ios_base::in);

	if(patchesFile.is_open())
	{
		LandmarkDetector::SkipComments(patchesFile);

		patchesFile >> scale;

		LandmarkDetector::SkipComments(patchesFile);

		int numberViews;

		patchesFile >> numberViews;

		// read the visibility
		centers.resize(numberViews);
		visibility.resize(numberViews);

		patches.resize(numberViews);

		LandmarkDetector::SkipComments(patchesFile);

		// centers of each view (which view corresponds to which orientation)
		for(size_t i = 0; i < centers.size(); i++)
		{
			cv::Mat center;
			LandmarkDetector::ReadMat(patchesFile, center);
			center.copyTo(centers[i]);
			centers[i] = centers[i] * M_PI / 180.0;
		}

		LandmarkDetector::SkipComments(patchesFile);

		// the visibility of points for each of the views (which verts are visible at a specific view
		for(size_t i = 0; i < visibility.size(); i++)
		{
			LandmarkDetector::ReadMat(patchesFile, visibility[i]);
		}

		int numberOfPoints = visibility[0].rows;

		LandmarkDetector::SkipComments(patchesFile);

		// read the patches themselves
		for(size_t i = 0; i < patches.size(); i++)
		{
			// number of patches for each view
			patches[i].resize(numberOfPoints);
			// read in each patch
			for(int j = 0; j < numberOfPoints; j++)
			{
				patches[i][j].Read(patchesFile);
			}
		}

		cout << "Done" << endl;
	}
	else
	{
		cout << "Can't find/open the patches file" << endl;
	}
}

//======================= Reading the CCNF patch experts =========================================//
void Patch_experts::Read_CCNF_patch_experts(string patchesFileLocation, std::vector<cv::Vec3d>& centers, std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<CCNF_patch_expert> >& patches, double& patchScaling)
{

	ifstream patchesFile(patchesFileLocation.c_str(), ios::in | ios::binary);

	if(patchesFile.is_open())
	{
		patchesFile.read ((char*)&patchScaling, 8);

		int numberViews;
		patchesFile.read ((char*)&numberViews, 4);

		// read the visibility
		centers.resize(numberViews);
		visibility.resize(numberViews);

		patches.resize(numberViews);

		// centers of each view (which view corresponds to which orientation)
		for(size_t i = 0; i < centers.size(); i++)
		{
			cv::Mat center;
			LandmarkDetector::ReadMatBin(patchesFile, center);
			center.copyTo(centers[i]);
			centers[i] = centers[i] * M_PI / 180.0;
		}

		// the visibility of points for each of the views (which verts are visible at a specific view
		for(size_t i = 0; i < visibility.size(); i++)
		{
			LandmarkDetector::ReadMatBin(patchesFile, visibility[i]);
		}
		int numberOfPoints = visibility[0].rows;

		// Read the possible SigmaInvs (without beta), this will be followed by patch reading (this assumes all of them have the same type, and number of betas)
		int num_win_sizes;
		int num_sigma_comp;
		patchesFile.read ((char*)&num_win_sizes, 4);

		vector<int> windows;
		windows.resize(num_win_sizes);

		vector<vector<cv::Mat_<float> > > sigma_components;
		sigma_components.resize(num_win_sizes);

		for (int w=0; w < num_win_sizes; ++w)
		{
			patchesFile.read ((char*)&windows[w], 4);

			patchesFile.read ((char*)&num_sigma_comp, 4);

			sigma_components[w].resize(num_sigma_comp);

			for(int s=0; s < num_sigma_comp; ++s)
			{
				LandmarkDetector::ReadMatBin(patchesFile, sigma_components[w][s]);
			}
		}

		this->sigma_components = sigma_components;

		// read the patches themselves
		for(size_t i = 0; i < patches.size(); i++)
		{
			// number of patches for each view
			patches[i].resize(numberOfPoints);
			// read in each patch
			for(int j = 0; j < numberOfPoints; j++)
			{
				patches[i][j].Read(patchesFile, windows, sigma_components);
			}
		}
		cout << "Done" << endl;
	}
	else
	{
		cout << "Can't find/open the patches file" << endl;
	}
}