From 16918e7fb5edc21b5c214af182235409473674a4 Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Fri, 3 Nov 2017 09:04:00 +0000 Subject: [PATCH] More work on recording, getting there --- exe/FeatureExtraction/FeatureExtraction.cpp | 285 ++---------------- .../include/LandmarkDetectorUtils.h | 3 +- .../src/LandmarkDetectorUtils.cpp | 11 +- lib/local/Recorder/include/RecorderCSV.h | 2 +- lib/local/Recorder/include/RecorderOpenFace.h | 4 +- lib/local/Recorder/src/RecorderCSV.cpp | 2 +- lib/local/Recorder/src/RecorderOpenFace.cpp | 4 +- 7 files changed, 34 insertions(+), 277 deletions(-) diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp index 5b40ed4..73eb4d5 100644 --- a/exe/FeatureExtraction/FeatureExtraction.cpp +++ b/exe/FeatureExtraction/FeatureExtraction.cpp @@ -96,50 +96,12 @@ vector get_arguments(int argc, char **argv) return arguments; } -// Useful utility for creating directories for storing the output files -void create_directory_from_file(string output_path) -{ - - // Creating the right directory structure - - // First get rid of the file - auto p = path(path(output_path).parent_path()); - - if(!p.empty() && !boost::filesystem::exists(p)) - { - bool success = boost::filesystem::create_directories(p); - if(!success) - { - cout << "Failed to create a directory... " << p.string() << endl; - } - } -} - -void create_directory(string output_path) -{ - - // Creating the right directory structure - auto p = path(output_path); - - if(!boost::filesystem::exists(p)) - { - bool success = boost::filesystem::create_directories(p); - - if(!success) - { - cout << "Failed to create a directory..." << p.string() << endl; - } - } -} - void get_output_feature_params(vector &output_similarity_aligned, vector &output_hog_aligned_files, bool& visualize_track, bool& visualize_align, bool& visualize_hog, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params, bool &output_pose, bool &output_AUs, bool &output_gaze, vector &arguments); void get_image_input_output_params_feats(vector > &input_image_files, bool& as_video, vector &arguments); -void output_HOG_frame(std::ofstream* hog_file, bool good_frame, const cv::Mat_& hog_descriptor, int num_rows, int num_cols); - // Some globals for tracking timing information for visualisation double fps_tracker = -1.0; int64 t0 = 0; @@ -203,45 +165,32 @@ void visualise_tracking(cv::Mat& captured_image, const LandmarkDetector::CLNF& f } } -// Output all of the information into one file in one go (quite a few parameters, but simplifies the flow) -void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, bool output_3D_landmarks, - bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze, - const LandmarkDetector::CLNF& face_model, int frame_count, double time_stamp, bool detection_success, - cv::Point3f gazeDirection0, cv::Point3f gazeDirection1, cv::Vec2d gaze_angle, cv::Vec6d& pose_estimate, double fx, double fy, double cx, double cy, - const FaceAnalysis::FaceAnalyser& face_analyser); - int main (int argc, char **argv) { vector arguments = get_arguments(argc, argv); - // Some initial parameters that can be overriden from command line - vector input_files, output_files, tracked_videos_output; - // Get the input output file parameters - + vector input_files, output_files; string output_codec; - // TODO rename - LandmarkDetector::get_video_input_output_params(input_files, output_files, tracked_videos_output, output_codec, arguments); + LandmarkDetector::get_video_input_output_params(input_files, output_files, output_codec, arguments); + // TODO remove bool video_input = true; bool images_as_video = false; vector > input_image_files; - // Adding image support for reading in the files + // Adding image support (TODO should be moved to capture) if(input_files.empty()) { - vector d_files; vector o_img; - vector> bboxes; get_image_input_output_params_feats(input_image_files, images_as_video, arguments); if(!input_image_files.empty()) { video_input = false; } - } // Grab camera parameters, if they are not defined (approximate values will be used) @@ -262,6 +211,7 @@ int main (int argc, char **argv) fx_undefined = true; } + // TODO these should be removed vector output_similarity_align; vector output_hog_align_files; @@ -280,6 +230,10 @@ int main (int argc, char **argv) get_output_feature_params(output_similarity_align, output_hog_align_files, visualize_track, visualize_align, visualize_hog, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, arguments); + bool output_hog = !output_hog_align_files.empty(); + // TODO, these should be read in through output feature params, which should not be part of featureextraction + bool output_video = true; + // If multiple video files are tracked, use this to indicate if we are done bool done = false; int f_n = -1; @@ -389,21 +343,7 @@ int main (int argc, char **argv) // TODO this should always be video input int num_eye_landmarks = LandmarkDetector::CalculateAllEyeLandmarks(face_model).size(); // TODO empty file check replaced Recorder::RecorderOpenFace openFaceRec(output_files[f_n], input_files[f_n], true, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, !output_hog_align_files.empty(), - !tracked_videos_output.empty(), !output_similarity_align.empty(), face_model.pdm.NumberOfPoints(), face_model.pdm.NumberOfModes(), num_eye_landmarks, face_analyser.GetAUClassNames(), face_analyser.GetAURegNames(), output_codec, fps_vid_in); - - // saving the videos - cv::VideoWriter writerFace; - if(!tracked_videos_output.empty()) - { - try - { - writerFace = cv::VideoWriter(tracked_videos_output[f_n], CV_FOURCC(output_codec[0],output_codec[1],output_codec[2],output_codec[3]), fps_vid_in, captured_image.size(), true); - } - catch(cv::Exception e) - { - WARN_STREAM( "Could not open VideoWriter, OUTPUT FILE WILL NOT BE WRITTEN. Currently using codec " << output_codec << ", try using an other one (-oc option)"); - } - } + output_video, !output_similarity_align.empty(), face_model.pdm.NumberOfPoints(), face_model.pdm.NumberOfModes(), num_eye_landmarks, face_analyser.GetAUClassNames(), face_analyser.GetAURegNames(), output_codec, fps_vid_in); int frame_count = 0; @@ -469,8 +409,8 @@ int main (int argc, char **argv) cv::Mat_ hog_descriptor; int num_hog_rows, num_hog_cols; - // But only if needed in output - if(!output_similarity_align.empty() || hog_output_file.is_open() || output_AUs) + // As this can be expensive only compute it if needed by output or visualization + if(!output_similarity_align.empty() || output_hog || output_AUs || visualize_align || visualize_hog) { face_analyser.AddNextFrame(captured_image, face_model.detected_landmarks, face_model.detection_success, time_stamp, false, !det_parameters.quiet_mode); face_analyser.GetLatestAlignedFace(sim_warped_img); @@ -479,7 +419,7 @@ int main (int argc, char **argv) { cv::imshow("sim_warp", sim_warped_img); } - if(hog_output_file.is_open() || (visualize_hog && !det_parameters.quiet_mode)) + if(output_hog || (visualize_hog && !det_parameters.quiet_mode)) { face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols); @@ -495,11 +435,6 @@ int main (int argc, char **argv) // Work out the pose of the head from the tracked model cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy); - if (hog_output_file.is_open()) - { - output_HOG_frame(&hog_output_file, detection_success, hog_descriptor, num_hog_rows, num_hog_cols); - } - // Write the similarity normalised output if (!output_similarity_align.empty()) { @@ -524,23 +459,23 @@ int main (int argc, char **argv) } } - // Visualising the tracker + // Visualising the tracker, TODO this should be in utility if(visualize_track && !det_parameters.quiet_mode) { visualise_tracking(captured_image, face_model, det_parameters, gazeDirection0, gazeDirection1, frame_count, fx, fy, cx, cy); } - // Output the landmarks, pose, gaze, parameters and AUs - outputAllFeatures(&output_file, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, - face_model, frame_count, time_stamp, detection_success, gazeDirection0, gazeDirection1, gazeAngle, - pose_estimate, fx, fy, cx, cy, face_analyser); - - // output the tracked video - if(!tracked_videos_output.empty()) - { - writerFace << captured_image; - } + // Setting up the recorder output + openFaceRec.SetObservationHOG(detection_success, hog_descriptor, num_hog_rows, num_hog_cols, 31); // The number of channels in HOG is fixed at the moment, as using FHOG + openFaceRec.SetObservationVisualization(captured_image); + openFaceRec.SetObservationActionUnits(face_analyser.GetCurrentAUsReg(), face_analyser.GetCurrentAUsClass()); + openFaceRec.SetObservationGaze(gazeDirection0, gazeDirection1, gazeAngle, LandmarkDetector::CalculateAllEyeLandmarks(face_model)); + openFaceRec.SetObservationLandmarks(face_model.detected_landmarks, face_model.GetShape(fx, fy, cx, cy), face_model.params_global, face_model.params_local, face_model.detection_certainty, detection_success); + openFaceRec.SetObservationPose(pose_estimate); + openFaceRec.SetObservationTimestamp(time_stamp); + openFaceRec.WriteObservation(); + // Grabbing the next frame (todo this should be part of capture) if(video_input) { video_capture >> captured_image; @@ -590,12 +525,12 @@ int main (int argc, char **argv) } - output_file.close(); + openFaceRec.Close(); if (output_files.size() > 0 && output_AUs) { cout << "Postprocessing the Action Unit predictions" << endl; - face_analyser.PostprocessOutputFile(output_files[f_n]); + face_analyser.PostprocessOutputFile(output_files[f_n]); // TODO this won't work, need the filename } // Reset the models for the next video face_analyser.Reset(); @@ -619,173 +554,6 @@ int main (int argc, char **argv) return 0; } -// Output all of the information into one file in one go (quite a few parameters, but simplifies the flow) -void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, bool output_3D_landmarks, - bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze, - const LandmarkDetector::CLNF& face_model, int frame_count, double time_stamp, bool detection_success, - cv::Point3f gazeDirection0, cv::Point3f gazeDirection1, cv::Vec2d gaze_angle, cv::Vec6d& pose_estimate, double fx, double fy, double cx, double cy, - const FaceAnalysis::FaceAnalyser& face_analyser) -{ - - double confidence = 0.5 * (1 - face_model.detection_certainty); - - *output_file << frame_count + 1 << ", " << time_stamp << ", " << confidence << ", " << detection_success; - - // Output the estimated gaze - if (output_gaze) - { - *output_file << ", " << gazeDirection0.x << ", " << gazeDirection0.y << ", " << gazeDirection0.z - << ", " << gazeDirection1.x << ", " << gazeDirection1.y << ", " << gazeDirection1.z; - - // Output gaze angle (same format as head pose angle) - *output_file << ", " << gaze_angle[0] << ", " << gaze_angle[1]; - - // Output eye landmarks - std::vector eye_landmark_points = LandmarkDetector::CalculateAllEyeLandmarks(face_model); - - for (size_t i = 0; i < eye_landmark_points.size(); ++i) - { - *output_file << ", " << eye_landmark_points[i].x; - } - for (size_t i = 0; i < eye_landmark_points.size(); ++i) - { - *output_file << ", " << eye_landmark_points[i].y; - } - } - - // Output the estimated head pose - if (output_pose) - { - if(face_model.tracking_initialised) - { - *output_file << ", " << pose_estimate[0] << ", " << pose_estimate[1] << ", " << pose_estimate[2] - << ", " << pose_estimate[3] << ", " << pose_estimate[4] << ", " << pose_estimate[5]; - } - else - { - *output_file << ", 0, 0, 0, 0, 0, 0"; - } - } - - // Output the detected 2D facial landmarks - if (output_2D_landmarks) - { - for (int i = 0; i < face_model.pdm.NumberOfPoints() * 2; ++i) - { - if(face_model.tracking_initialised) - { - *output_file << ", " << face_model.detected_landmarks.at(i); - } - else - { - *output_file << ", 0"; - } - } - } - - // Output the detected 3D facial landmarks - if (output_3D_landmarks) - { - cv::Mat_ shape_3D = face_model.GetShape(fx, fy, cx, cy); - for (int i = 0; i < face_model.pdm.NumberOfPoints() * 3; ++i) - { - if (face_model.tracking_initialised) - { - *output_file << ", " << shape_3D.at(i); - } - else - { - *output_file << ", 0"; - } - } - } - - if (output_model_params) - { - for (int i = 0; i < 6; ++i) - { - if (face_model.tracking_initialised) - { - *output_file << ", " << face_model.params_global[i]; - } - else - { - *output_file << ", 0"; - } - } - for (int i = 0; i < face_model.pdm.NumberOfModes(); ++i) - { - if(face_model.tracking_initialised) - { - *output_file << ", " << face_model.params_local.at(i, 0); - } - else - { - *output_file << ", 0"; - } - } - } - - - - if (output_AUs) - { - auto aus_reg = face_analyser.GetCurrentAUsReg(); - - vector au_reg_names = face_analyser.GetAURegNames(); - std::sort(au_reg_names.begin(), au_reg_names.end()); - - // write out ar the correct index - for (string au_name : au_reg_names) - { - for (auto au_reg : aus_reg) - { - if (au_name.compare(au_reg.first) == 0) - { - *output_file << ", " << au_reg.second; - break; - } - } - } - - if (aus_reg.size() == 0) - { - for (size_t p = 0; p < face_analyser.GetAURegNames().size(); ++p) - { - *output_file << ", 0"; - } - } - - auto aus_class = face_analyser.GetCurrentAUsClass(); - - vector au_class_names = face_analyser.GetAUClassNames(); - std::sort(au_class_names.begin(), au_class_names.end()); - - // write out ar the correct index - for (string au_name : au_class_names) - { - for (auto au_class : aus_class) - { - if (au_name.compare(au_class.first) == 0) - { - *output_file << ", " << au_class.second; - break; - } - } - } - - if (aus_class.size() == 0) - { - for (size_t p = 0; p < face_analyser.GetAUClassNames().size(); ++p) - { - *output_file << ", 0"; - } - } - } - *output_file << endl; -} - - void get_output_feature_params(vector &output_similarity_aligned, vector &output_hog_aligned_files, bool& visualize_track, bool& visualize_align, bool& visualize_hog, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params, bool &output_pose, bool &output_AUs, bool &output_gaze, vector &arguments) @@ -836,7 +604,6 @@ void get_output_feature_params(vector &output_similarity_aligned, vector else if (arguments[i].compare("-hogalign") == 0) { output_hog_aligned_files.push_back(output_root + arguments[i + 1]); - create_directory_from_file(output_root + arguments[i + 1]); valid[i] = false; valid[i + 1] = false; i++; diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h index b54ffe7..26b1c97 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h @@ -52,8 +52,7 @@ namespace LandmarkDetector //============================================================================================= // Helper functions for parsing the inputs //============================================================================================= - void get_video_input_output_params(vector &input_video_file, vector &output_files, - vector &output_video_files, string &output_codec, vector &arguments); + void get_video_input_output_params(vector &input_video_file, vector &output_files, string &output_codec, vector &arguments); void get_camera_params(int &device, float &fx, float &fy, float &cx, float &cy, vector &arguments); diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp index 959aa70..b473002 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp @@ -96,8 +96,7 @@ void create_directories(string output_path) } // Extracting the following command line arguments -f, -op, -of, -ov (and possible ordered repetitions) -void get_video_input_output_params(vector &input_video_files, vector &output_files, - vector &output_video_files, string& output_codec, vector &arguments) +void get_video_input_output_params(vector &input_video_files, vector &output_files, string& output_codec, vector &arguments) { bool* valid = new bool[arguments.size()]; @@ -154,14 +153,6 @@ void get_video_input_output_params(vector &input_video_files, vector& landmarks_2D, const cv::Mat_& landmarks_3D, const cv::Mat_& pdm_model_params, const cv::Vec6d& rigid_shape_params, cv::Vec6d& pose_estimate, - const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const cv::Mat_& eye_landmarks, + const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const std::vector& eye_landmarks, const std::vector >& au_intensities, const std::vector >& au_occurences); // TODO have set functions? diff --git a/lib/local/Recorder/include/RecorderOpenFace.h b/lib/local/Recorder/include/RecorderOpenFace.h index 504983a..218aef9 100644 --- a/lib/local/Recorder/include/RecorderOpenFace.h +++ b/lib/local/Recorder/include/RecorderOpenFace.h @@ -87,7 +87,7 @@ namespace Recorder // Gaze related observations void SetObservationGaze(const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, - const cv::Vec2d& gaze_angle, const cv::Mat_& eye_landmarks); + const cv::Vec2d& gaze_angle, const std::vector& eye_landmarks); // Face alignment related observations void SetObservationFaceAlign(const cv::Mat& aligned_face); @@ -145,7 +145,7 @@ namespace Recorder cv::Point3f gaze_direction0; cv::Point3f gaze_direction1; cv::Vec2d gaze_angle; - cv::Mat_ eye_landmarks; + std::vector eye_landmarks; int observation_count; diff --git a/lib/local/Recorder/src/RecorderCSV.cpp b/lib/local/Recorder/src/RecorderCSV.cpp index fea7241..06eb6da 100644 --- a/lib/local/Recorder/src/RecorderCSV.cpp +++ b/lib/local/Recorder/src/RecorderCSV.cpp @@ -157,7 +157,7 @@ bool RecorderCSV::Open(std::string output_file_name, bool output_2D_landmarks, b // TODO check if the stream is open void RecorderCSV::WriteLine(int observation_count, double time_stamp, bool landmark_detection_success, double landmark_confidence, const cv::Mat_& landmarks_2D, const cv::Mat_& landmarks_3D, const cv::Mat_& pdm_model_params, const cv::Vec6d& rigid_shape_params, cv::Vec6d& pose_estimate, - const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const cv::Mat_& eye_landmarks, + const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const std::vector& eye_landmarks, const std::vector >& au_intensities, const std::vector >& au_occurences) { diff --git a/lib/local/Recorder/src/RecorderOpenFace.cpp b/lib/local/Recorder/src/RecorderOpenFace.cpp index 124e13a..c2e1c1d 100644 --- a/lib/local/Recorder/src/RecorderOpenFace.cpp +++ b/lib/local/Recorder/src/RecorderOpenFace.cpp @@ -151,7 +151,7 @@ void RecorderOpenFace::WriteObservation() if(output_tracked_video) { - if (vis_to_out.empty) + if (vis_to_out.empty()) { WARN_STREAM("Output tracked video frame is not set"); } @@ -197,7 +197,7 @@ void RecorderOpenFace::SetObservationActionUnits(const std::vector& eye_landmarks) + const cv::Vec2d& gaze_angle, const std::vector& eye_landmarks) { this->gaze_direction0 = gaze_direction0; this->gaze_direction1 = gaze_direction1;