/////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2017, Carnegie Mellon University and University of Cambridge, // all rights reserved. // // ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY // // BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. // IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE. // // License can be found in OpenFace-license.txt // // * Any publications arising from the use of this software, including but // not limited to academic journal and conference publications, technical // reports and manuals, must cite at least one of the following works: // // OpenFace: an open source facial behavior analysis toolkit // Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency // in IEEE Winter Conference on Applications of Computer Vision, 2016 // // Rendering of Eyes for Eye-Shape Registration and Gaze Estimation // Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling // in IEEE International. Conference on Computer Vision (ICCV), 2015 // // Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection // Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson // in Facial Expression Recognition and Analysis Challenge, // IEEE International Conference on Automatic Face and Gesture Recognition, 2015 // // Constrained Local Neural Fields for robust facial landmark detection in the wild. // Tadas Baltrušaitis, Peter Robinson, and Louis-Philippe Morency. // in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013. // /////////////////////////////////////////////////////////////////////////////// #include // OpenCV includes #include #include // For FHOG visualisation #include using namespace std; namespace FaceAnalysis { // Pick only the more stable/rigid points under changes of expression void extract_rigid_points(cv::Mat_& source_points, cv::Mat_& destination_points) { if(source_points.rows == 68) { cv::Mat_ tmp_source = source_points.clone(); source_points = cv::Mat_(); // Push back the rigid points (some face outline, eyes, and nose) source_points.push_back(tmp_source.row(1)); source_points.push_back(tmp_source.row(2)); source_points.push_back(tmp_source.row(3)); source_points.push_back(tmp_source.row(4)); source_points.push_back(tmp_source.row(12)); source_points.push_back(tmp_source.row(13)); source_points.push_back(tmp_source.row(14)); source_points.push_back(tmp_source.row(15)); source_points.push_back(tmp_source.row(27)); source_points.push_back(tmp_source.row(28)); source_points.push_back(tmp_source.row(29)); source_points.push_back(tmp_source.row(31)); source_points.push_back(tmp_source.row(32)); source_points.push_back(tmp_source.row(33)); source_points.push_back(tmp_source.row(34)); source_points.push_back(tmp_source.row(35)); source_points.push_back(tmp_source.row(36)); source_points.push_back(tmp_source.row(39)); source_points.push_back(tmp_source.row(40)); source_points.push_back(tmp_source.row(41)); source_points.push_back(tmp_source.row(42)); source_points.push_back(tmp_source.row(45)); source_points.push_back(tmp_source.row(46)); source_points.push_back(tmp_source.row(47)); cv::Mat_ tmp_dest = destination_points.clone(); destination_points = cv::Mat_(); // Push back the rigid points destination_points.push_back(tmp_dest.row(1)); destination_points.push_back(tmp_dest.row(2)); destination_points.push_back(tmp_dest.row(3)); destination_points.push_back(tmp_dest.row(4)); destination_points.push_back(tmp_dest.row(12)); destination_points.push_back(tmp_dest.row(13)); destination_points.push_back(tmp_dest.row(14)); destination_points.push_back(tmp_dest.row(15)); destination_points.push_back(tmp_dest.row(27)); destination_points.push_back(tmp_dest.row(28)); destination_points.push_back(tmp_dest.row(29)); destination_points.push_back(tmp_dest.row(31)); destination_points.push_back(tmp_dest.row(32)); destination_points.push_back(tmp_dest.row(33)); destination_points.push_back(tmp_dest.row(34)); destination_points.push_back(tmp_dest.row(35)); destination_points.push_back(tmp_dest.row(36)); destination_points.push_back(tmp_dest.row(39)); destination_points.push_back(tmp_dest.row(40)); destination_points.push_back(tmp_dest.row(41)); destination_points.push_back(tmp_dest.row(42)); destination_points.push_back(tmp_dest.row(45)); destination_points.push_back(tmp_dest.row(46)); destination_points.push_back(tmp_dest.row(47)); } } // Aligning a face to a common reference frame void AlignFace(cv::Mat& aligned_face, const cv::Mat& frame, const LandmarkDetector::CLNF& clnf_model, bool rigid, double sim_scale, int out_width, int out_height) { // Will warp to scaled mean shape cv::Mat_ similarity_normalised_shape = clnf_model.pdm.mean_shape * sim_scale; // Discard the z component similarity_normalised_shape = similarity_normalised_shape(cv::Rect(0, 0, 1, 2*similarity_normalised_shape.rows/3)).clone(); cv::Mat_ source_landmarks = clnf_model.detected_landmarks.reshape(1, 2).t(); cv::Mat_ destination_landmarks = similarity_normalised_shape.reshape(1, 2).t(); // Aligning only the more rigid points if(rigid) { extract_rigid_points(source_landmarks, destination_landmarks); } cv::Matx22d scale_rot_matrix = LandmarkDetector::AlignShapesWithScale(source_landmarks, destination_landmarks); cv::Matx23d warp_matrix; warp_matrix(0,0) = scale_rot_matrix(0,0); warp_matrix(0,1) = scale_rot_matrix(0,1); warp_matrix(1,0) = scale_rot_matrix(1,0); warp_matrix(1,1) = scale_rot_matrix(1,1); double tx = clnf_model.params_global[4]; double ty = clnf_model.params_global[5]; cv::Vec2d T(tx, ty); T = scale_rot_matrix * T; // Make sure centering is correct warp_matrix(0,2) = -T(0) + out_width/2; warp_matrix(1,2) = -T(1) + out_height/2; cv::warpAffine(frame, aligned_face, warp_matrix, cv::Size(out_width, out_height), cv::INTER_LINEAR); } // Aligning a face to a common reference frame void AlignFaceMask(cv::Mat& aligned_face, const cv::Mat& frame, const LandmarkDetector::CLNF& clnf_model, const cv::Mat_& triangulation, bool rigid, double sim_scale, int out_width, int out_height) { // Will warp to scaled mean shape cv::Mat_ similarity_normalised_shape = clnf_model.pdm.mean_shape * sim_scale; // Discard the z component similarity_normalised_shape = similarity_normalised_shape(cv::Rect(0, 0, 1, 2*similarity_normalised_shape.rows/3)).clone(); cv::Mat_ source_landmarks = clnf_model.detected_landmarks.reshape(1, 2).t(); cv::Mat_ destination_landmarks = similarity_normalised_shape.reshape(1, 2).t(); // Aligning only the more rigid points if(rigid) { extract_rigid_points(source_landmarks, destination_landmarks); } cv::Matx22d scale_rot_matrix = LandmarkDetector::AlignShapesWithScale(source_landmarks, destination_landmarks); cv::Matx23d warp_matrix; warp_matrix(0,0) = scale_rot_matrix(0,0); warp_matrix(0,1) = scale_rot_matrix(0,1); warp_matrix(1,0) = scale_rot_matrix(1,0); warp_matrix(1,1) = scale_rot_matrix(1,1); double tx = clnf_model.params_global[4]; double ty = clnf_model.params_global[5]; cv::Vec2d T(tx, ty); T = scale_rot_matrix * T; // Make sure centering is correct warp_matrix(0,2) = -T(0) + out_width/2; warp_matrix(1,2) = -T(1) + out_height/2; cv::warpAffine(frame, aligned_face, warp_matrix, cv::Size(out_width, out_height), cv::INTER_LINEAR); // Move the destination landmarks there as well cv::Matx22d warp_matrix_2d(warp_matrix(0,0), warp_matrix(0,1), warp_matrix(1,0), warp_matrix(1,1)); destination_landmarks = cv::Mat(clnf_model.detected_landmarks.reshape(1, 2).t()) * cv::Mat(warp_matrix_2d).t(); destination_landmarks.col(0) = destination_landmarks.col(0) + warp_matrix(0,2); destination_landmarks.col(1) = destination_landmarks.col(1) + warp_matrix(1,2); // Move the eyebrows up to include more of upper face destination_landmarks.at(0,1) -= (30/0.7)*sim_scale; destination_landmarks.at(16,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(17,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(18,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(19,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(20,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(21,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(22,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(23,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(24,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(25,1) -= (30 / 0.7)*sim_scale; destination_landmarks.at(26,1) -= (30 / 0.7)*sim_scale; destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t(); LandmarkDetector::PAW paw(destination_landmarks, triangulation, 0, 0, aligned_face.cols-1, aligned_face.rows-1); // Mask each of the channels (a bit of a roundabout way, but OpenCV 3.1 in debug mode doesn't seem to be able to handle a more direct way using split and merge) vector aligned_face_channels(aligned_face.channels()); for (int c = 0; c < aligned_face.channels(); ++c) { cv::extractChannel(aligned_face, aligned_face_channels[c], c); } for(size_t i = 0; i < aligned_face_channels.size(); ++i) { cv::multiply(aligned_face_channels[i], paw.pixel_mask, aligned_face_channels[i], 1.0, CV_8U); } if(aligned_face.channels() == 3) { cv::Mat planes[] = { aligned_face_channels[0], aligned_face_channels[1], aligned_face_channels[2] }; cv::merge(planes, 3, aligned_face); } else { aligned_face = aligned_face_channels[0]; } } void Visualise_FHOG(const cv::Mat_& descriptor, int num_rows, int num_cols, cv::Mat& visualisation) { // First convert to dlib format dlib::array2d > hog(num_rows, num_cols); cv::MatConstIterator_ descriptor_it = descriptor.begin(); for(int y = 0; y < num_cols; ++y) { for(int x = 0; x < num_rows; ++x) { for(unsigned int o = 0; o < 31; ++o) { hog[y][x](o) = *descriptor_it++; } } } // Draw the FHOG to OpenCV format auto fhog_vis = dlib::draw_fhog(hog); visualisation = dlib::toMat(fhog_vis).clone(); } // Create a row vector Felzenszwalb HOG descriptor from a given image void Extract_FHOG_descriptor(cv::Mat_& descriptor, const cv::Mat& image, int& num_rows, int& num_cols, int cell_size) { dlib::array2d > hog; if(image.channels() == 1) { dlib::cv_image dlib_warped_img(image); dlib::extract_fhog_features(dlib_warped_img, hog, cell_size); } else { dlib::cv_image dlib_warped_img(image); dlib::extract_fhog_features(dlib_warped_img, hog, cell_size); } // Convert to a usable format num_cols = hog.nc(); num_rows = hog.nr(); descriptor = cv::Mat_(1, num_cols * num_rows * 31); cv::MatIterator_ descriptor_it = descriptor.begin(); for(int y = 0; y < num_cols; ++y) { for(int x = 0; x < num_rows; ++x) { for(unsigned int o = 0; o < 31; ++o) { *descriptor_it++ = (double)hog[y][x](o); } } } } // Extract summary statistics (mean, stdev, min, max) from each dimension of a descriptor, each row is a descriptor void ExtractSummaryStatistics(const cv::Mat_& descriptors, cv::Mat_& sum_stats, bool use_mean, bool use_stdev, bool use_max_min) { // Using four summary statistics at the moment // Means, stds, mins, maxs int num_stats = 0; if(use_mean) num_stats++; if(use_stdev) num_stats++; if(use_max_min) num_stats++; sum_stats = cv::Mat_(1, descriptors.cols * num_stats, 0.0); for(int i = 0; i < descriptors.cols; ++i) { cv::Scalar mean, stdev; cv::meanStdDev(descriptors.col(i), mean, stdev); int add = 0; if(use_mean) { sum_stats.at(0, i*num_stats + add) = mean[0]; add++; } if(use_stdev) { sum_stats.at(0, i*num_stats + add) = stdev[0]; add++; } if(use_max_min) { double min, max; cv::minMaxIdx(descriptors.col(i), &min, &max); sum_stats.at(0, i*num_stats + add) = max - min; add++; } } } void AddDescriptor(cv::Mat_& descriptors, cv::Mat_ new_descriptor, int curr_frame, int num_frames_to_keep) { if(descriptors.empty()) { descriptors = cv::Mat_(num_frames_to_keep, new_descriptor.cols, 0.0); } int row_to_change = curr_frame % num_frames_to_keep; new_descriptor.copyTo(descriptors.row(row_to_change)); } }