From 65e91da3a6fd06f668d9c84a4c05c3aa60eefba1 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Fri, 3 Jun 2016 16:53:27 -0400 Subject: [PATCH] Full face model. --- exe/FeatureExtraction/FeatureExtraction.cpp | 51 +++++++++++++-------- lib/local/FaceAnalyser/src/FaceAnalyser.cpp | 44 ++++++++++++++---- lib/local/FaceAnalyser/src/Face_utils.cpp | 24 +++++----- 3 files changed, 78 insertions(+), 41 deletions(-) diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp index e1b7f7c..3c3d46d 100644 --- a/exe/FeatureExtraction/FeatureExtraction.cpp +++ b/exe/FeatureExtraction/FeatureExtraction.cpp @@ -153,7 +153,7 @@ void create_directory(string output_path) } void get_output_feature_params(vector &output_similarity_aligned, vector &output_hog_aligned_files, double &similarity_scale, - int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, + int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, bool& dynamic, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params, bool &output_pose, bool &output_AUs, bool &output_gaze, vector &arguments); @@ -235,7 +235,7 @@ void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, boo cv::Point3f gazeDirection0, cv::Point3f gazeDirection1, const cv::Vec6d& pose_estimate, double fx, double fy, double cx, double cy, const FaceAnalysis::FaceAnalyser& face_analyser); -void post_process_output_file(FaceAnalysis::FaceAnalyser& face_analyser, string output_file); +void post_process_output_file(FaceAnalysis::FaceAnalyser& face_analyser, string output_file, bool dynamic); int main (int argc, char **argv) @@ -306,6 +306,7 @@ int main (int argc, char **argv) bool grayscale = false; bool video_output = false; bool rigid = false; + bool dynamic = true; // Indicates if a dynamic AU model should be used (dynamic is useful if the video is long enough to include neutral expressions) int num_hog_rows; int num_hog_cols; @@ -318,7 +319,7 @@ int main (int argc, char **argv) bool output_AUs = true; bool output_gaze = true; - get_output_feature_params(output_similarity_align, output_hog_align_files, sim_scale, sim_size, grayscale, rigid, verbose, + get_output_feature_params(output_similarity_align, output_hog_align_files, sim_scale, sim_size, grayscale, rigid, verbose, dynamic, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, arguments); // Used for image masking @@ -359,13 +360,24 @@ int main (int argc, char **argv) int curr_img = -1; string au_loc; - if(boost::filesystem::exists(path("AU_predictors/AU_all_best.txt"))) + + string au_loc_local; + if (dynamic) { - au_loc = "AU_predictors/AU_all_best.txt"; + au_loc_local = "AU_predictors/AU_all_best.txt"; } else { - path loc = path(arguments[0]).parent_path() / "AU_predictors/AU_all_best.txt"; + au_loc_local = "AU_predictors/AU_all_static.txt"; + } + + if(boost::filesystem::exists(path(au_loc_local))) + { + au_loc = au_loc_local; + } + else + { + path loc = path(arguments[0]).parent_path() / au_loc_local; if(exists(loc)) { @@ -684,16 +696,10 @@ int main (int argc, char **argv) output_file.close(); - if(output_files.size() > 0) + if(output_files.size() > 0 && output_AUs) { - - // If the video is long enough post-process it for AUs - if (output_AUs && frame_count > 1000) - { - cout << "Postprocessing the Action Unit predictions" << endl; - - post_process_output_file(face_analyser, output_files[f_n]); - } + cout << "Postprocessing the Action Unit predictions" << endl; + post_process_output_file(face_analyser, output_files[f_n], dynamic); } // Reset the models for the next video face_analyser.Reset(); @@ -718,7 +724,7 @@ int main (int argc, char **argv) } // Allows for post processing of the AU signal -void post_process_output_file(FaceAnalysis::FaceAnalyser& face_analyser, string output_file) +void post_process_output_file(FaceAnalysis::FaceAnalyser& face_analyser, string output_file, bool dynamic) { vector certainties; @@ -728,8 +734,8 @@ void post_process_output_file(FaceAnalysis::FaceAnalyser& face_analyser, string vector>> predictions_class; // Construct the new values to overwrite the output file with - face_analyser.ExtractAllPredictionsOfflineReg(predictions_reg, certainties, successes, timestamps); - face_analyser.ExtractAllPredictionsOfflineClass(predictions_class, certainties, successes, timestamps); + face_analyser.ExtractAllPredictionsOfflineReg(predictions_reg, certainties, successes, timestamps, dynamic); + face_analyser.ExtractAllPredictionsOfflineClass(predictions_class, certainties, successes, timestamps, dynamic); int num_class = predictions_class.size(); int num_reg = predictions_reg.size(); @@ -1024,7 +1030,7 @@ void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, boo void get_output_feature_params(vector &output_similarity_aligned, vector &output_hog_aligned_files, double &similarity_scale, - int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, + int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, bool& dynamic, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params, bool &output_pose, bool &output_AUs, bool &output_gaze, vector &arguments) { @@ -1041,6 +1047,9 @@ void get_output_feature_params(vector &output_similarity_aligned, vector string input_root = ""; string output_root = ""; + // By default the model is dynamic + dynamic = true; + // First check if there is a root argument (so that videos and outputs could be defined more easilly) for (size_t i = 0; i < arguments.size(); ++i) { @@ -1088,6 +1097,10 @@ void get_output_feature_params(vector &output_similarity_aligned, vector { rigid = true; } + else if (arguments[i].compare("-au_static") == 0) + { + dynamic = false; + } else if (arguments[i].compare("-g") == 0) { grayscale = true; diff --git a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp index 5308823..ba6e346 100644 --- a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp +++ b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp @@ -592,10 +592,12 @@ void FaceAnalyser::PostprocessPredictions() } } -void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps) +void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic) { - - PostprocessPredictions(); + if(dynamic) + { + PostprocessPredictions(); + } timestamps = this->timestamps; au_predictions.clear(); @@ -624,14 +626,14 @@ void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector 5) @@ -666,9 +668,12 @@ void FaceAnalyser::ExtractAllPredictionsOfflineReg(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps) +void FaceAnalyser::ExtractAllPredictionsOfflineClass(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic) { - PostprocessPredictions(); + if (dynamic) + { + PostprocessPredictions(); + } timestamps = this->timestamps; au_predictions.clear(); @@ -678,6 +683,25 @@ void FaceAnalyser::ExtractAllPredictionsOfflineClass(vectorfirst; vector au_vals = au_iter->second; + // Perform a moving average of 7 frames on classifications + int window_size = 7; + vector au_vals_tmp = au_vals; + for (size_t i = (window_size - 1)/2; i < au_vals.size() - (window_size - 1) / 2; ++i) + { + double sum = 0; + for (int w = -(window_size - 1) / 2; w < (window_size - 1) / 2; ++w) + { + sum += au_vals_tmp[i + w]; + } + sum = sum / window_size; + if (sum < 0.5) + sum = 0; + else + sum = 1; + + au_vals[i] = sum; + } + au_predictions.push_back(std::pair>(au_name, au_vals)); } diff --git a/lib/local/FaceAnalyser/src/Face_utils.cpp b/lib/local/FaceAnalyser/src/Face_utils.cpp index 8b2ed4f..d95117e 100644 --- a/lib/local/FaceAnalyser/src/Face_utils.cpp +++ b/lib/local/FaceAnalyser/src/Face_utils.cpp @@ -221,19 +221,19 @@ namespace FaceAnalysis destination_landmarks.col(1) = destination_landmarks.col(1) + warp_matrix(1,2); // Move the eyebrows up to include more of upper face - destination_landmarks.at(0,1) -= 15; - destination_landmarks.at(16,1) -= 15; + destination_landmarks.at(0,1) -= 30; + destination_landmarks.at(16,1) -= 30; - destination_landmarks.at(17,1) -= 7; - destination_landmarks.at(18,1) -= 7; - destination_landmarks.at(19,1) -= 7; - destination_landmarks.at(20,1) -= 7; - destination_landmarks.at(21,1) -= 7; - destination_landmarks.at(22,1) -= 7; - destination_landmarks.at(23,1) -= 7; - destination_landmarks.at(24,1) -= 7; - destination_landmarks.at(25,1) -= 7; - destination_landmarks.at(26,1) -= 7; + destination_landmarks.at(17,1) -= 30; + destination_landmarks.at(18,1) -= 30; + destination_landmarks.at(19,1) -= 30; + destination_landmarks.at(20,1) -= 30; + destination_landmarks.at(21,1) -= 30; + destination_landmarks.at(22,1) -= 30; + destination_landmarks.at(23,1) -= 30; + destination_landmarks.at(24,1) -= 30; + destination_landmarks.at(25,1) -= 30; + destination_landmarks.at(26,1) -= 30; destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t();