From 52c50b4ff3389e06846aec77479c7cf11dcddd7c Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Wed, 8 Mar 2017 11:46:50 -0500 Subject: [PATCH] - Fixing the issue with gaze not tracking properly in video and landmark modes. - Fixing the simscale/simalign bug --- .gitignore | 3 + .travis.yml | 1 + appveyor.yml | 1 + exe/FeatureExtraction/FeatureExtraction.cpp | 32 +- lib/local/FaceAnalyser/include/FaceAnalyser.h | 273 +++++++++--------- lib/local/FaceAnalyser/src/FaceAnalyser.cpp | 141 ++++----- lib/local/FaceAnalyser/src/Face_utils.cpp | 24 +- .../src/LandmarkDetectorModel.cpp | 5 +- .../Demos/feature_extraction_demo_img_seq.m | 2 +- .../Gaze Experiments/mpii_1500_errs.mat | Bin 11721 -> 11721 bytes 10 files changed, 226 insertions(+), 256 deletions(-) diff --git a/.gitignore b/.gitignore index 04fad1c..94adde9 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ exe/Recording/Debug/ lib/3rdParty/dlib/Debug/ lib/local/FaceAnalyser/Debug/ lib/local/LandmarkDetector/Debug/ +matlab_runners/Head Pose Experiments/experiments/biwi_out/ +matlab_runners/Head Pose Experiments/experiments/bu_out/ +matlab_runners/Head Pose Experiments/experiments/ict_out/ diff --git a/.travis.yml b/.travis.yml index 76b6e2b..fde65bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -75,4 +75,5 @@ script: - ../build/bin/FaceLandmarkImg -inroot ../videos -f Obama.jpg -outroot data -of obama.txt -op obama.3d -oi obama.bmp -multi_view 1 -wild -q - ../build/bin/FaceLandmarkVidMulti -inroot ../videos -f multi_face.avi -outroot output -ov multi_face.avi -q - ../build/bin/FeatureExtraction -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -ov blair.avi -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_test.avi -hogalign hog_test.dat -q + - ../build/bin/FeatureExtraction -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -simsize 200 -simscale 0.5 -ov blair.avi -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_test.avi -hogalign hog_test.dat -q - ../build/bin/FaceLandmarkVid -inroot ../videos -f 1815_01_008_tony_blair.avi -f 0188_03_021_al_pacino.avi -f 0217_03_006_alanis_morissette.avi -outroot output_data -ov 1.avi -ov 2.avi -ov 3.avi -q diff --git a/appveyor.yml b/appveyor.yml index dcba59b..5c90d1f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -25,4 +25,5 @@ test_script: - cmd: if exist "../videos" (FaceLandmarkImg.exe -inroot ../videos -f obama.jpg -outroot out_data -of obama.pts -op obama.3d -oi obama.bmp -q) else (FaceLandmarkImg.exe -inroot ../../videos -f obama.jpg -outroot out_data -of obama.pts -op obama.3d -oi obama.bmp -q) - cmd: if exist "../videos" (FaceLandmarkVidMulti.exe -inroot ../videos -f multi_face.avi -ov multi_face.avi -q) else (FaceLandmarkVidMulti.exe -inroot ../../videos -f multi_face.avi -ov multi_face.avi -q) - cmd: if exist "../videos" (FeatureExtraction.exe -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_track.avi -hogalign hog_test.dat -q) else (FeatureExtraction.exe -f "../../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_track.avi -hogalign hog_test.dat -q) + - cmd: if exist "../videos" (FeatureExtraction.exe -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -simsize 200 -simscale 0.5 -ov feat_track.avi -hogalign hog_test.dat -q) else (FeatureExtraction.exe -f "../../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -simsize 200 -simscale 0.5 -ov feat_track.avi -hogalign hog_test.dat -q) - cmd: if exist "../videos" (FaceLandmarkVid.exe -f "../videos/1815_01_008_tony_blair.avi" -ov track.avi -q) else (FaceLandmarkVid.exe -f "../../videos/1815_01_008_tony_blair.avi" -ov track.avi -q) diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp index 2c67b6c..ea8e6be 100644 --- a/exe/FeatureExtraction/FeatureExtraction.cpp +++ b/exe/FeatureExtraction/FeatureExtraction.cpp @@ -308,9 +308,9 @@ int main (int argc, char **argv) vector output_similarity_align; vector output_hog_align_files; - double sim_scale = 0.7; + double sim_scale = -1; int sim_size = 112; - bool grayscale = false; + bool grayscale = false; bool video_output = false; bool dynamic = true; // Indicates if a dynamic AU model should be used (dynamic is useful if the video is long enough to include neutral expressions) int num_hog_rows; @@ -321,13 +321,13 @@ int main (int argc, char **argv) bool output_2D_landmarks = true; bool output_3D_landmarks = true; bool output_model_params = true; - bool output_pose = true; + bool output_pose = true; bool output_AUs = true; bool output_gaze = true; get_output_feature_params(output_similarity_align, output_hog_align_files, sim_scale, sim_size, grayscale, verbose, dynamic, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, arguments); - + // Used for image masking string tri_loc; boost::filesystem::path tri_loc_path = boost::filesystem::path("model/tris_68_full.txt"); @@ -391,7 +391,10 @@ int main (int argc, char **argv) } // Creating a face analyser that will be used for AU extraction - FaceAnalysis::FaceAnalyser face_analyser(vector(), 0.7, 112, 112, au_loc, tri_loc); + // Make sure sim_scale is proportional to sim_size if not set + if (sim_scale == -1) sim_scale = sim_size * (0.7 / 112.0); + + FaceAnalysis::FaceAnalyser face_analyser(vector(), sim_scale, sim_size, sim_size, au_loc, tri_loc); while(!done) // this is not a for loop as we might also be reading from a webcam { @@ -593,7 +596,7 @@ int main (int argc, char **argv) } if(hog_output_file.is_open()) { - FaceAnalysis::Extract_FHOG_descriptor(hog_descriptor, sim_warped_img, num_hog_rows, num_hog_cols); + face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols); if(visualise_hog && !det_parameters.quiet_mode) { @@ -615,13 +618,13 @@ int main (int argc, char **argv) pose_estimate = LandmarkDetector::GetCorrectedPoseCamera(face_model, fx, fy, cx, cy); } - if(hog_output_file.is_open()) + if (hog_output_file.is_open()) { output_HOG_frame(&hog_output_file, detection_success, hog_descriptor, num_hog_rows, num_hog_cols); } // Write the similarity normalised output - if(!output_similarity_align.empty()) + if (!output_similarity_align.empty()) { if (sim_warped_img.channels() == 3 && grayscale) @@ -630,18 +633,18 @@ int main (int argc, char **argv) } char name[100]; - - // output the frame number - std::sprintf(name, "frame_det_%06d.bmp", frame_count); + + // Filename is based on frame number + std::sprintf(name, "frame_det_%06d.bmp", frame_count + 1); // Construct the output filename boost::filesystem::path slash("/"); - + std::string preferredSlash = slash.make_preferred().string(); - + string out_file = output_similarity_align[f_n] + preferredSlash + string(name); bool write_success = imwrite(out_file, sim_warped_img); - + if (!write_success) { cout << "Could not output similarity aligned image image" << endl; @@ -1206,6 +1209,7 @@ void get_output_feature_params(vector &output_similarity_aligned, vector } + // Can process images via directories creating a separate output file per directory void get_image_input_output_params_feats(vector > &input_image_files, bool& as_video, vector &arguments) { diff --git a/lib/local/FaceAnalyser/include/FaceAnalyser.h b/lib/local/FaceAnalyser/include/FaceAnalyser.h index e5bd9f8..f55d455 100644 --- a/lib/local/FaceAnalyser/include/FaceAnalyser.h +++ b/lib/local/FaceAnalyser/include/FaceAnalyser.h @@ -74,184 +74,183 @@ namespace FaceAnalysis { -class FaceAnalyser{ + class FaceAnalyser { -public: + public: - enum RegressorType{ SVR_appearance_static_linear = 0, SVR_appearance_dynamic_linear = 1, SVR_dynamic_geom_linear = 2, SVR_combined_linear = 3, SVM_linear_stat = 4, SVM_linear_dyn = 5, SVR_linear_static_seg = 6, SVR_linear_dynamic_seg =7}; + enum RegressorType { SVR_appearance_static_linear = 0, SVR_appearance_dynamic_linear = 1, SVR_dynamic_geom_linear = 2, SVR_combined_linear = 3, SVM_linear_stat = 4, SVM_linear_dyn = 5, SVR_linear_static_seg = 6, SVR_linear_dynamic_seg = 7 }; - // Constructor from a model file (or a default one if not provided - // TODO scale width and height should be read in as part of the model as opposed to being here? - FaceAnalyser(vector orientation_bins = vector(), double scale = 0.7, int width = 112, int height = 112, std::string au_location = "AU_predictors/AU_all_best.txt", std::string tri_location = "model/tris_68_full.txt"); + // Constructor from a model file (or a default one if not provided + // TODO scale width and height should be read in as part of the model as opposed to being here? + FaceAnalyser(vector orientation_bins = vector(), double scale = 0.7, int width = 112, int height = 112, std::string au_location = "AU_predictors/AU_all_best.txt", std::string tri_location = "model/tris_68_full.txt"); - void AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, double timestamp_seconds, bool online = false, bool visualise = true); + void AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, double timestamp_seconds, bool online = false, bool visualise = true); - // If the features are extracted manually (shouldn't really be used) - void PredictAUs(const cv::Mat_& hog_features, const cv::Mat_& geom_features, const LandmarkDetector::CLNF& clnf_model, bool online); + // If the features are extracted manually (shouldn't really be used) + void PredictAUs(const cv::Mat_& hog_features, const cv::Mat_& geom_features, const LandmarkDetector::CLNF& clnf_model, bool online); - cv::Mat GetLatestHOGDescriptorVisualisation(); + cv::Mat GetLatestHOGDescriptorVisualisation(); - double GetCurrentTimeSeconds(); - - // Grab the current predictions about AUs from the face analyser - std::vector> GetCurrentAUsClass() const; // AU presence - std::vector> GetCurrentAUsReg() const; // AU intensity - std::vector> GetCurrentAUsCombined() const; // Both presense and intensity + double GetCurrentTimeSeconds(); - // A standalone call for predicting AUs from a static image, the first element in the pair represents occurence the second intensity - // This call is useful for detecting action units in images - std::pair>, std::vector>> PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise = true); + // Grab the current predictions about AUs from the face analyser + std::vector> GetCurrentAUsClass() const; // AU presence + std::vector> GetCurrentAUsReg() const; // AU intensity + std::vector> GetCurrentAUsCombined() const; // Both presense and intensity - void Reset(); + // A standalone call for predicting AUs from a static image, the first element in the pair represents occurence the second intensity + // This call is useful for detecting action units in images + std::pair>, std::vector>> PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise = true); - void GetLatestHOG(cv::Mat_& hog_descriptor, int& num_rows, int& num_cols); - void GetLatestAlignedFace(cv::Mat& image); - - void GetLatestNeutralHOG(cv::Mat_& hog_descriptor, int& num_rows, int& num_cols); - - cv::Mat_ GetTriangulation(); + void Reset(); - cv::Mat_ GetLatestAlignedFaceGrayscale(); - - void GetGeomDescriptor(cv::Mat_& geom_desc); + void GetLatestHOG(cv::Mat_& hog_descriptor, int& num_rows, int& num_cols); + void GetLatestAlignedFace(cv::Mat& image); - void ExtractCurrentMedians(vector& hog_medians, vector& face_image_medians, vector& orientations); + void GetLatestNeutralHOG(cv::Mat_& hog_descriptor, int& num_rows, int& num_cols); - // Grab the names of AUs being predicted - std::vector GetAUClassNames() const; // Presence - std::vector GetAURegNames() const; // Intensity + cv::Mat_ GetTriangulation(); - // Identify if models are static or dynamic (useful for correction and shifting) - std::vector GetDynamicAUClass() const; // Presence - std::vector> GetDynamicAUReg() const; // Intensity + void GetGeomDescriptor(cv::Mat_& geom_desc); + + // Grab the names of AUs being predicted + std::vector GetAUClassNames() const; // Presence + std::vector GetAURegNames() const; // Intensity + + // Identify if models are static or dynamic (useful for correction and shifting) + std::vector GetDynamicAUClass() const; // Presence + std::vector> GetDynamicAUReg() const; // Intensity - void ExtractAllPredictionsOfflineReg(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic); - void ExtractAllPredictionsOfflineClass(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic); + void ExtractAllPredictionsOfflineReg(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic); + void ExtractAllPredictionsOfflineClass(vector>>& au_predictions, vector& confidences, vector& successes, vector& timestamps, bool dynamic); -private: + // Helper function for post-processing AU output files + void FaceAnalyser::PostprocessOutputFile(string output_file, bool dynamic); - // Where the predictions are kept - std::vector> AU_predictions_reg; - std::vector> AU_predictions_class; + private: - std::vector> AU_predictions_combined; + // Where the predictions are kept + std::vector> AU_predictions_reg; + std::vector> AU_predictions_class; - // Keeping track of AU predictions over time (useful for post-processing) - vector timestamps; - std::map> AU_predictions_reg_all_hist; - std::map> AU_predictions_class_all_hist; - std::vector confidences; - std::vector valid_preds; + std::vector> AU_predictions_combined; - int frames_tracking; + // Keeping track of AU predictions over time (useful for post-processing) + vector timestamps; + std::map> AU_predictions_reg_all_hist; + std::map> AU_predictions_class_all_hist; + std::vector confidences; + std::vector valid_preds; - // Cache of intermediate images - cv::Mat_ aligned_face_grayscale; - cv::Mat aligned_face; - cv::Mat hog_descriptor_visualisation; + int frames_tracking; - // Private members to be used for predictions - // The HOG descriptor of the last frame - cv::Mat_ hog_desc_frame; - int num_hog_rows; - int num_hog_cols; + // Cache of intermediate images + cv::Mat aligned_face_for_au; + cv::Mat aligned_face_for_output; + cv::Mat hog_descriptor_visualisation; - // Keep a running median of the hog descriptors and a aligned images - cv::Mat_ hog_desc_median; - cv::Mat_ face_image_median; + // Private members to be used for predictions + // The HOG descriptor of the last frame + cv::Mat_ hog_desc_frame; + int num_hog_rows; + int num_hog_cols; - // Use histograms for quick (but approximate) median computation - // Use the same for - vector > hog_desc_hist; + // Keep a running median of the hog descriptors and a aligned images + cv::Mat_ hog_desc_median; + cv::Mat_ face_image_median; - // This is not being used at the moment as it is a bit slow - vector > face_image_hist; - vector face_image_hist_sum; + // Use histograms for quick (but approximate) median computation + // Use the same for + vector > hog_desc_hist; - vector head_orientations; + // This is not being used at the moment as it is a bit slow + vector > face_image_hist; + vector face_image_hist_sum; - int num_bins_hog; - double min_val_hog; - double max_val_hog; - vector hog_hist_sum; - int view_used; + vector head_orientations; - // The geometry descriptor (rigid followed by non-rigid shape parameters from CLNF) - cv::Mat_ geom_descriptor_frame; - cv::Mat_ geom_descriptor_median; - - int geom_hist_sum; - cv::Mat_ geom_desc_hist; - int num_bins_geom; - double min_val_geom; - double max_val_geom; - - // Using the bounding box of previous analysed frame to determine if a reset is needed - cv::Rect_ face_bounding_box; - - // The AU predictions internally - std::vector> PredictCurrentAUs(int view); - std::vector> PredictCurrentAUsClass(int view); + int num_bins_hog; + double min_val_hog; + double max_val_hog; + vector hog_hist_sum; + int view_used; - // special step for online (rather than offline AU prediction) - std::vector> CorrectOnlineAUs(std::vector> predictions_orig, int view, bool dyn_shift = false, bool dyn_scale = false, bool update_track = true, bool clip_values = false); + // The geometry descriptor (rigid followed by non-rigid shape parameters from CLNF) + cv::Mat_ geom_descriptor_frame; + cv::Mat_ geom_descriptor_median; - void ReadAU(std::string au_location); + int geom_hist_sum; + cv::Mat_ geom_desc_hist; + int num_bins_geom; + double min_val_geom; + double max_val_geom; - void ReadRegressor(std::string fname, const vector& au_names); + // Using the bounding box of previous analysed frame to determine if a reset is needed + cv::Rect_ face_bounding_box; - // A utility function for keeping track of approximate running medians used for AU and emotion inference using a set of histograms (the histograms are evenly spaced from min_val to max_val) - // Descriptor has to be a row vector - // TODO this duplicates some other code - void UpdateRunningMedian(cv::Mat_& histogram, int& hist_sum, cv::Mat_& median, const cv::Mat_& descriptor, bool update, int num_bins, double min_val, double max_val); - void ExtractMedian(cv::Mat_& histogram, int hist_count, cv::Mat_& median, int num_bins, double min_val, double max_val); - - // The linear SVR regressors - SVR_static_lin_regressors AU_SVR_static_appearance_lin_regressors; - SVR_dynamic_lin_regressors AU_SVR_dynamic_appearance_lin_regressors; - - // The linear SVM classifiers - SVM_static_lin AU_SVM_static_appearance_lin; - SVM_dynamic_lin AU_SVM_dynamic_appearance_lin; + // The AU predictions internally + std::vector> PredictCurrentAUs(int view); + std::vector> PredictCurrentAUsClass(int view); - // The AUs predicted by the model are not always 0 calibrated to a person. That is they don't always predict 0 for a neutral expression - // Keeping track of the predictions we can correct for this, by assuming that at least "ratio" of frames are neutral and subtract that value of prediction, only perform the correction after min_frames - void UpdatePredictionTrack(cv::Mat_& prediction_corr_histogram, int& prediction_correction_count, vector& correction, const vector>& predictions, double ratio=0.25, int num_bins = 200, double min_val = -3, double max_val = 5, int min_frames = 10); - void GetSampleHist(cv::Mat_& prediction_corr_histogram, int prediction_correction_count, vector& sample, double ratio, int num_bins = 200, double min_val = 0, double max_val = 5); + // special step for online (rather than offline AU prediction) + std::vector> CorrectOnlineAUs(std::vector> predictions_orig, int view, bool dyn_shift = false, bool dyn_scale = false, bool update_track = true, bool clip_values = false); - void PostprocessPredictions(); + void ReadAU(std::string au_location); - vector> au_prediction_correction_histogram; - vector au_prediction_correction_count; + void ReadRegressor(std::string fname, const vector& au_names); - // Some dynamic scaling (the logic is that before the extreme versions of expression or emotion are shown, - // it is hard to tell the boundaries, this allows us to scale the model to the most extreme seen) - // They have to be view specific - vector> dyn_scaling; - - // Keeping track of predictions for summary stats - cv::Mat_ AU_prediction_track; - cv::Mat_ geom_desc_track; + // A utility function for keeping track of approximate running medians used for AU and emotion inference using a set of histograms (the histograms are evenly spaced from min_val to max_val) + // Descriptor has to be a row vector + // TODO this duplicates some other code + void UpdateRunningMedian(cv::Mat_& histogram, int& hist_sum, cv::Mat_& median, const cv::Mat_& descriptor, bool update, int num_bins, double min_val, double max_val); + void ExtractMedian(cv::Mat_& histogram, int hist_count, cv::Mat_& median, int num_bins, double min_val, double max_val); - double current_time_seconds; + // The linear SVR regressors + SVR_static_lin_regressors AU_SVR_static_appearance_lin_regressors; + SVR_dynamic_lin_regressors AU_SVR_dynamic_appearance_lin_regressors; - // Used for face alignment - cv::Mat_ triangulation; - double align_scale; - int align_width; - int align_height; + // The linear SVM classifiers + SVM_static_lin AU_SVM_static_appearance_lin; + SVM_dynamic_lin AU_SVM_dynamic_appearance_lin; - // Useful placeholder for renormalizing the initial frames of shorter videos - int max_init_frames = 3000; - vector> hog_desc_frames_init; - vector> geom_descriptor_frames_init; - vector views; - bool postprocessed = false; - int frames_tracking_succ = 0; + // The AUs predicted by the model are not always 0 calibrated to a person. That is they don't always predict 0 for a neutral expression + // Keeping track of the predictions we can correct for this, by assuming that at least "ratio" of frames are neutral and subtract that value of prediction, only perform the correction after min_frames + void UpdatePredictionTrack(cv::Mat_& prediction_corr_histogram, int& prediction_correction_count, vector& correction, const vector>& predictions, double ratio = 0.25, int num_bins = 200, double min_val = -3, double max_val = 5, int min_frames = 10); + void GetSampleHist(cv::Mat_& prediction_corr_histogram, int prediction_correction_count, vector& sample, double ratio, int num_bins = 200, double min_val = 0, double max_val = 5); -}; - //=========================================================================== + void PostprocessPredictions(); + + vector> au_prediction_correction_histogram; + vector au_prediction_correction_count; + + // Some dynamic scaling (the logic is that before the extreme versions of expression or emotion are shown, + // it is hard to tell the boundaries, this allows us to scale the model to the most extreme seen) + // They have to be view specific + vector> dyn_scaling; + + // Keeping track of predictions for summary stats + cv::Mat_ AU_prediction_track; + cv::Mat_ geom_desc_track; + + double current_time_seconds; + + // Used for face alignment + cv::Mat_ triangulation; + double align_scale; + int align_width; + int align_height; + + // Useful placeholder for renormalizing the initial frames of shorter videos + int max_init_frames = 3000; + vector> hog_desc_frames_init; + vector> geom_descriptor_frames_init; + vector views; + bool postprocessed = false; + int frames_tracking_succ = 0; + + }; + //=========================================================================== } #endif diff --git a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp index d32c823..757d3f2 100644 --- a/lib/local/FaceAnalyser/src/FaceAnalyser.cpp +++ b/lib/local/FaceAnalyser/src/FaceAnalyser.cpp @@ -226,7 +226,7 @@ void FaceAnalyser::GetLatestHOG(cv::Mat_& hog_descriptor, int& num_rows, void FaceAnalyser::GetLatestAlignedFace(cv::Mat& image) { - image = this->aligned_face.clone(); + image = this->aligned_face_for_output.clone(); } void FaceAnalyser::GetLatestNeutralHOG(cv::Mat_& hog_descriptor, int& num_rows, int& num_cols) @@ -267,57 +267,22 @@ int GetViewId(const vector orientations_all, const cv::Vec3d& orienta } -void FaceAnalyser::ExtractCurrentMedians(vector& hog_medians, vector& face_image_medians, vector& orientations) -{ - - orientations = this->head_orientations; - - for(size_t i = 0; i < orientations.size(); ++i) - { - cv::Mat_ median_face(this->face_image_median.rows, this->face_image_median.cols, 0.0); - cv::Mat_ median_hog(this->hog_desc_median.rows, this->hog_desc_median.cols, 0.0); - - ExtractMedian(this->face_image_hist[i], this->face_image_hist_sum[i], median_face, 256, 0, 255); - ExtractMedian(this->hog_desc_hist[i], this->hog_hist_sum[i], median_hog, this->num_bins_hog, 0, 1); - - // Add the HOG sample - hog_medians.push_back(median_hog.clone()); - - // For the face image need to convert it to suitable format - cv::Mat_ aligned_face_cols_uchar; - median_face.convertTo(aligned_face_cols_uchar, CV_8U); - - cv::Mat aligned_face_uchar; - if(aligned_face.channels() == 1) - { - aligned_face_uchar = cv::Mat(aligned_face.rows, aligned_face.cols, CV_8U, aligned_face_cols_uchar.data); - } - else - { - aligned_face_uchar = cv::Mat(aligned_face.rows, aligned_face.cols, CV_8UC3, aligned_face_cols_uchar.data); - } - - face_image_medians.push_back(aligned_face_uchar.clone()); - - } -} - std::pair>, std::vector>> FaceAnalyser::PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise) { - + // First align the face - AlignFaceMask(aligned_face, frame, clnf, triangulation, true, align_scale, align_width, align_height); - + AlignFaceMask(aligned_face_for_au, frame, clnf, triangulation, true, 0.7, 112, 112); + // Extract HOG descriptor from the frame and convert it to a useable format cv::Mat_ hog_descriptor; - Extract_FHOG_descriptor(hog_descriptor, aligned_face, this->num_hog_rows, this->num_hog_cols); + Extract_FHOG_descriptor(hog_descriptor, aligned_face_for_au, this->num_hog_rows, this->num_hog_cols); // Store the descriptor hog_desc_frame = hog_descriptor; cv::Vec3d curr_orient(clnf.params_global[1], clnf.params_global[2], clnf.params_global[3]); int orientation_to_use = GetViewId(this->head_orientations, curr_orient); - + // Geom descriptor and its median geom_descriptor_frame = clnf.params_local.t(); @@ -325,11 +290,11 @@ std::pair>, std::vector locs = clnf.pdm.princ_comp * geom_descriptor_frame.t(); cv::hconcat(locs.t(), geom_descriptor_frame.clone(), geom_descriptor_frame); - - // First convert the face image to double representation as a row vector - cv::Mat_ aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1); - cv::Mat_ aligned_face_cols_double; - aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F); + + // First convert the face image to double representation as a row vector, TODO rem + //cv::Mat_ aligned_face_cols(1, aligned_face_for_au.cols * aligned_face_for_au.rows * aligned_face_for_au.channels(), aligned_face_for_au.data, 1); + //cv::Mat_ aligned_face_cols_double; + //aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F); // Visualising the median HOG if (visualise) @@ -361,29 +326,34 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL frames_tracking++; // First align the face if tracking was successfull - if(clnf_model.detection_success) + if (clnf_model.detection_success) { - AlignFaceMask(aligned_face, frame, clnf_model, triangulation, true, align_scale, align_width, align_height); - } - else - { - aligned_face = cv::Mat(align_height, align_width, CV_8UC3); - aligned_face.setTo(0); - } - if(aligned_face.channels() == 3) - { - cv::cvtColor(aligned_face, aligned_face_grayscale, CV_BGR2GRAY); + // The aligned face requirement for AUs + AlignFaceMask(aligned_face_for_au, frame, clnf_model, triangulation, true, 0.7, 112, 112); + + // If the output requirement matches use the already computed one, else compute it again + if (align_scale == 0.7 && align_width == 112 && align_height == 112) + { + aligned_face_for_output = aligned_face_for_au.clone(); + } + else + { + AlignFaceMask(aligned_face_for_output, frame, clnf_model, triangulation, true, align_scale, align_width, align_height); + } } else { - aligned_face_grayscale = aligned_face.clone(); + aligned_face_for_output = cv::Mat(align_height, align_width, CV_8UC3); + aligned_face_for_au = cv::Mat(112, 112, CV_8UC3); + aligned_face_for_output.setTo(0); + aligned_face_for_au.setTo(0); } // Extract HOG descriptor from the frame and convert it to a useable format cv::Mat_ hog_descriptor; - Extract_FHOG_descriptor(hog_descriptor, aligned_face, this->num_hog_rows, this->num_hog_cols); - + Extract_FHOG_descriptor(hog_descriptor, aligned_face_for_au, this->num_hog_rows, this->num_hog_cols); + // Store the descriptor hog_desc_frame = hog_descriptor; @@ -425,41 +395,38 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL frames_tracking_succ++; // A small speedup - if(frames_tracking % 2 == 1) + if (frames_tracking % 2 == 1) { UpdateRunningMedian(this->hog_desc_hist[orientation_to_use], this->hog_hist_sum[orientation_to_use], this->hog_desc_median, hog_descriptor, update_median, this->num_bins_hog, this->min_val_hog, this->max_val_hog); this->hog_desc_median.setTo(0, this->hog_desc_median < 0); - } + } // Geom descriptor and its median geom_descriptor_frame = clnf_model.params_local.t(); - - if(!clnf_model.detection_success) + + if (!clnf_model.detection_success) { geom_descriptor_frame.setTo(0); } // Stack with the actual feature point locations (without mean) cv::Mat_ locs = clnf_model.pdm.princ_comp * geom_descriptor_frame.t(); - + cv::hconcat(locs.t(), geom_descriptor_frame.clone(), geom_descriptor_frame); - + // A small speedup - if(frames_tracking % 2 == 1) + if (frames_tracking % 2 == 1) { UpdateRunningMedian(this->geom_desc_hist, this->geom_hist_sum, this->geom_descriptor_median, geom_descriptor_frame, update_median, this->num_bins_geom, this->min_val_geom, this->max_val_geom); } - // First convert the face image to double representation as a row vector - cv::Mat_ aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1); - cv::Mat_ aligned_face_cols_double; - aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F); - - // TODO get rid of this completely as it takes too long? - //UpdateRunningMedian(this->face_image_hist[orientation_to_use], this->face_image_hist_sum[orientation_to_use], this->face_image_median, aligned_face_cols_double, update_median, 256, 0, 255); + // First convert the face image to double representation as a row vector, TODO rem? + //cv::Mat_ aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1); + //cv::Mat_ aligned_face_cols_double; + //aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F); // Visualising the median HOG - if(visualise) + if (visualise) { FaceAnalysis::Visualise_FHOG(hog_descriptor, num_hog_rows, num_hog_cols, hog_descriptor_visualisation); } @@ -468,9 +435,9 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL AU_predictions_reg = PredictCurrentAUs(orientation_to_use); std::vector> AU_predictions_reg_corrected; - if(online) + if (online) { - AU_predictions_reg_corrected = CorrectOnlineAUs(AU_predictions_reg, orientation_to_use, true, false, clnf_model.detection_success); + AU_predictions_reg_corrected = CorrectOnlineAUs(AU_predictions_reg, orientation_to_use, true, false, clnf_model.detection_success, true); } // Add the reg predictions to the historic data @@ -479,7 +446,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL // Find the appropriate AU (if not found add it) // Only add if the detection was successful - if(clnf_model.detection_success) + if (clnf_model.detection_success) { AU_predictions_reg_all_hist[AU_predictions_reg[au].first].push_back(AU_predictions_reg[au].second); } @@ -488,7 +455,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL AU_predictions_reg_all_hist[AU_predictions_reg[au].first].push_back(0); } } - + AU_predictions_class = PredictCurrentAUsClass(orientation_to_use); for (size_t au = 0; au < AU_predictions_class.size(); ++au) @@ -496,7 +463,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL // Find the appropriate AU (if not found add it) // Only add if the detection was successful - if(clnf_model.detection_success) + if (clnf_model.detection_success) { AU_predictions_class_all_hist[AU_predictions_class[au].first].push_back(AU_predictions_class[au].second); } @@ -505,9 +472,9 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL AU_predictions_class_all_hist[AU_predictions_class[au].first].push_back(0); } } - - if(online) + + if (online) { AU_predictions_reg = AU_predictions_reg_corrected; } @@ -524,15 +491,13 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL this->current_time_seconds = timestamp_seconds; view_used = orientation_to_use; - + bool success = clnf_model.detection_success; confidences.push_back(clnf_model.detection_certainty); valid_preds.push_back(success); timestamps.push_back(timestamp_seconds); - - } void FaceAnalyser::GetGeomDescriptor(cv::Mat_& geom_desc) @@ -1101,12 +1066,6 @@ vector> FaceAnalyser::PredictCurrentAUsClass(int view) return predictions; } - -cv::Mat_ FaceAnalyser::GetLatestAlignedFaceGrayscale() -{ - return aligned_face_grayscale.clone(); -} - cv::Mat FaceAnalyser::GetLatestHOGDescriptorVisualisation() { return hog_descriptor_visualisation; diff --git a/lib/local/FaceAnalyser/src/Face_utils.cpp b/lib/local/FaceAnalyser/src/Face_utils.cpp index 52e0094..b6d4eb5 100644 --- a/lib/local/FaceAnalyser/src/Face_utils.cpp +++ b/lib/local/FaceAnalyser/src/Face_utils.cpp @@ -221,19 +221,19 @@ namespace FaceAnalysis destination_landmarks.col(1) = destination_landmarks.col(1) + warp_matrix(1,2); // Move the eyebrows up to include more of upper face - destination_landmarks.at(0,1) -= 30; - destination_landmarks.at(16,1) -= 30; + destination_landmarks.at(0,1) -= (30/0.7)*sim_scale; + destination_landmarks.at(16,1) -= (30 / 0.7)*sim_scale; - destination_landmarks.at(17,1) -= 30; - destination_landmarks.at(18,1) -= 30; - destination_landmarks.at(19,1) -= 30; - destination_landmarks.at(20,1) -= 30; - destination_landmarks.at(21,1) -= 30; - destination_landmarks.at(22,1) -= 30; - destination_landmarks.at(23,1) -= 30; - destination_landmarks.at(24,1) -= 30; - destination_landmarks.at(25,1) -= 30; - destination_landmarks.at(26,1) -= 30; + destination_landmarks.at(17,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(18,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(19,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(20,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(21,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(22,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(23,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(24,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(25,1) -= (30 / 0.7)*sim_scale; + destination_landmarks.at(26,1) -= (30 / 0.7)*sim_scale; destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t(); diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp index f4e0707..cc3ed32 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp @@ -366,6 +366,9 @@ void CLNF::Read(string main_location) // The other module locations should be defined as relative paths from the main model boost::filesystem::path root = boost::filesystem::path(main_location).parent_path(); + // Assume no eye model, unless read-in + eye_model = false; + // The main file contains the references to other files while (!locations.eof()) { @@ -387,6 +390,7 @@ void CLNF::Read(string main_location) location = location.substr(0, location.size()-1); } + // append to root location = (root / location).string(); if (module.compare("LandmarkDetector") == 0) @@ -536,7 +540,6 @@ void CLNF::Read(string main_location) tracking_initialised = false; model_likelihood = -10; // very low detection_certainty = 1; // very uncertain - eye_model = false; // Initialising default values for the rest of the variables diff --git a/matlab_runners/Demos/feature_extraction_demo_img_seq.m b/matlab_runners/Demos/feature_extraction_demo_img_seq.m index d551304..9d72344 100644 --- a/matlab_runners/Demos/feature_extraction_demo_img_seq.m +++ b/matlab_runners/Demos/feature_extraction_demo_img_seq.m @@ -35,7 +35,7 @@ for i=1:numel(in_dirs) command = cat(2, command, ['-asvid -fdir "' in_dirs{i} '" -of "' outputFile '" ']); - command = cat(2, command, [' -simalign "' outputDir_aligned '" -hogalign "' outputHOG_aligned '"']); + command = cat(2, command, [' -simalign "' outputDir_aligned '" -simsize 200 -hogalign "' outputHOG_aligned '"']); end diff --git a/matlab_runners/Gaze Experiments/mpii_1500_errs.mat b/matlab_runners/Gaze Experiments/mpii_1500_errs.mat index 7b726db24f24ac86ff36288f1200286402b4b5a5..e006f70e0d276b53850ac21e7d8dbfef7061da8d 100644 GIT binary patch delta 41 wcmX>ZeKLB2i9~p6ih^%qk%EDRf}w$xp{bRzv4W9-q4~r><%tQb8%siU0Th}HS^xk5 delta 41 wcmX>ZeKLB2i9|?7se)Hxo`Qj?f}x?6p}Ccbxq^{_q4~r><%tQb8%siU0TwR|X#fBK