More work on recording, getting there

This commit is contained in:
Tadas Baltrusaitis 2017-11-03 09:04:00 +00:00
parent b60669fa62
commit 16918e7fb5
7 changed files with 34 additions and 277 deletions

View File

@ -96,50 +96,12 @@ vector<string> get_arguments(int argc, char **argv)
return arguments;
}
// Useful utility for creating directories for storing the output files
void create_directory_from_file(string output_path)
{
// Creating the right directory structure
// First get rid of the file
auto p = path(path(output_path).parent_path());
if(!p.empty() && !boost::filesystem::exists(p))
{
bool success = boost::filesystem::create_directories(p);
if(!success)
{
cout << "Failed to create a directory... " << p.string() << endl;
}
}
}
void create_directory(string output_path)
{
// Creating the right directory structure
auto p = path(output_path);
if(!boost::filesystem::exists(p))
{
bool success = boost::filesystem::create_directories(p);
if(!success)
{
cout << "Failed to create a directory..." << p.string() << endl;
}
}
}
void get_output_feature_params(vector<string> &output_similarity_aligned, vector<string> &output_hog_aligned_files, bool& visualize_track,
bool& visualize_align, bool& visualize_hog, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params,
bool &output_pose, bool &output_AUs, bool &output_gaze, vector<string> &arguments);
void get_image_input_output_params_feats(vector<vector<string> > &input_image_files, bool& as_video, vector<string> &arguments);
void output_HOG_frame(std::ofstream* hog_file, bool good_frame, const cv::Mat_<double>& hog_descriptor, int num_rows, int num_cols);
// Some globals for tracking timing information for visualisation
double fps_tracker = -1.0;
int64 t0 = 0;
@ -203,45 +165,32 @@ void visualise_tracking(cv::Mat& captured_image, const LandmarkDetector::CLNF& f
}
}
// Output all of the information into one file in one go (quite a few parameters, but simplifies the flow)
void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, bool output_3D_landmarks,
bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze,
const LandmarkDetector::CLNF& face_model, int frame_count, double time_stamp, bool detection_success,
cv::Point3f gazeDirection0, cv::Point3f gazeDirection1, cv::Vec2d gaze_angle, cv::Vec6d& pose_estimate, double fx, double fy, double cx, double cy,
const FaceAnalysis::FaceAnalyser& face_analyser);
int main (int argc, char **argv)
{
vector<string> arguments = get_arguments(argc, argv);
// Some initial parameters that can be overriden from command line
vector<string> input_files, output_files, tracked_videos_output;
// Get the input output file parameters
vector<string> input_files, output_files;
string output_codec;
// TODO rename
LandmarkDetector::get_video_input_output_params(input_files, output_files, tracked_videos_output, output_codec, arguments);
LandmarkDetector::get_video_input_output_params(input_files, output_files, output_codec, arguments);
// TODO remove
bool video_input = true;
bool images_as_video = false;
vector<vector<string> > input_image_files;
// Adding image support for reading in the files
// Adding image support (TODO should be moved to capture)
if(input_files.empty())
{
vector<string> d_files;
vector<string> o_img;
vector<cv::Rect_<double>> bboxes;
get_image_input_output_params_feats(input_image_files, images_as_video, arguments);
if(!input_image_files.empty())
{
video_input = false;
}
}
// Grab camera parameters, if they are not defined (approximate values will be used)
@ -262,6 +211,7 @@ int main (int argc, char **argv)
fx_undefined = true;
}
// TODO these should be removed
vector<string> output_similarity_align;
vector<string> output_hog_align_files;
@ -280,6 +230,10 @@ int main (int argc, char **argv)
get_output_feature_params(output_similarity_align, output_hog_align_files, visualize_track, visualize_align, visualize_hog,
output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, arguments);
bool output_hog = !output_hog_align_files.empty();
// TODO, these should be read in through output feature params, which should not be part of featureextraction
bool output_video = true;
// If multiple video files are tracked, use this to indicate if we are done
bool done = false;
int f_n = -1;
@ -389,21 +343,7 @@ int main (int argc, char **argv)
// TODO this should always be video input
int num_eye_landmarks = LandmarkDetector::CalculateAllEyeLandmarks(face_model).size(); // TODO empty file check replaced
Recorder::RecorderOpenFace openFaceRec(output_files[f_n], input_files[f_n], true, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze, !output_hog_align_files.empty(),
!tracked_videos_output.empty(), !output_similarity_align.empty(), face_model.pdm.NumberOfPoints(), face_model.pdm.NumberOfModes(), num_eye_landmarks, face_analyser.GetAUClassNames(), face_analyser.GetAURegNames(), output_codec, fps_vid_in);
// saving the videos
cv::VideoWriter writerFace;
if(!tracked_videos_output.empty())
{
try
{
writerFace = cv::VideoWriter(tracked_videos_output[f_n], CV_FOURCC(output_codec[0],output_codec[1],output_codec[2],output_codec[3]), fps_vid_in, captured_image.size(), true);
}
catch(cv::Exception e)
{
WARN_STREAM( "Could not open VideoWriter, OUTPUT FILE WILL NOT BE WRITTEN. Currently using codec " << output_codec << ", try using an other one (-oc option)");
}
}
output_video, !output_similarity_align.empty(), face_model.pdm.NumberOfPoints(), face_model.pdm.NumberOfModes(), num_eye_landmarks, face_analyser.GetAUClassNames(), face_analyser.GetAURegNames(), output_codec, fps_vid_in);
int frame_count = 0;
@ -469,8 +409,8 @@ int main (int argc, char **argv)
cv::Mat_<double> hog_descriptor;
int num_hog_rows, num_hog_cols;
// But only if needed in output
if(!output_similarity_align.empty() || hog_output_file.is_open() || output_AUs)
// As this can be expensive only compute it if needed by output or visualization
if(!output_similarity_align.empty() || output_hog || output_AUs || visualize_align || visualize_hog)
{
face_analyser.AddNextFrame(captured_image, face_model.detected_landmarks, face_model.detection_success, time_stamp, false, !det_parameters.quiet_mode);
face_analyser.GetLatestAlignedFace(sim_warped_img);
@ -479,7 +419,7 @@ int main (int argc, char **argv)
{
cv::imshow("sim_warp", sim_warped_img);
}
if(hog_output_file.is_open() || (visualize_hog && !det_parameters.quiet_mode))
if(output_hog || (visualize_hog && !det_parameters.quiet_mode))
{
face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols);
@ -495,11 +435,6 @@ int main (int argc, char **argv)
// Work out the pose of the head from the tracked model
cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy);
if (hog_output_file.is_open())
{
output_HOG_frame(&hog_output_file, detection_success, hog_descriptor, num_hog_rows, num_hog_cols);
}
// Write the similarity normalised output
if (!output_similarity_align.empty())
{
@ -524,23 +459,23 @@ int main (int argc, char **argv)
}
}
// Visualising the tracker
// Visualising the tracker, TODO this should be in utility
if(visualize_track && !det_parameters.quiet_mode)
{
visualise_tracking(captured_image, face_model, det_parameters, gazeDirection0, gazeDirection1, frame_count, fx, fy, cx, cy);
}
// Output the landmarks, pose, gaze, parameters and AUs
outputAllFeatures(&output_file, output_2D_landmarks, output_3D_landmarks, output_model_params, output_pose, output_AUs, output_gaze,
face_model, frame_count, time_stamp, detection_success, gazeDirection0, gazeDirection1, gazeAngle,
pose_estimate, fx, fy, cx, cy, face_analyser);
// output the tracked video
if(!tracked_videos_output.empty())
{
writerFace << captured_image;
}
// Setting up the recorder output
openFaceRec.SetObservationHOG(detection_success, hog_descriptor, num_hog_rows, num_hog_cols, 31); // The number of channels in HOG is fixed at the moment, as using FHOG
openFaceRec.SetObservationVisualization(captured_image);
openFaceRec.SetObservationActionUnits(face_analyser.GetCurrentAUsReg(), face_analyser.GetCurrentAUsClass());
openFaceRec.SetObservationGaze(gazeDirection0, gazeDirection1, gazeAngle, LandmarkDetector::CalculateAllEyeLandmarks(face_model));
openFaceRec.SetObservationLandmarks(face_model.detected_landmarks, face_model.GetShape(fx, fy, cx, cy), face_model.params_global, face_model.params_local, face_model.detection_certainty, detection_success);
openFaceRec.SetObservationPose(pose_estimate);
openFaceRec.SetObservationTimestamp(time_stamp);
openFaceRec.WriteObservation();
// Grabbing the next frame (todo this should be part of capture)
if(video_input)
{
video_capture >> captured_image;
@ -590,12 +525,12 @@ int main (int argc, char **argv)
}
output_file.close();
openFaceRec.Close();
if (output_files.size() > 0 && output_AUs)
{
cout << "Postprocessing the Action Unit predictions" << endl;
face_analyser.PostprocessOutputFile(output_files[f_n]);
face_analyser.PostprocessOutputFile(output_files[f_n]); // TODO this won't work, need the filename
}
// Reset the models for the next video
face_analyser.Reset();
@ -619,173 +554,6 @@ int main (int argc, char **argv)
return 0;
}
// Output all of the information into one file in one go (quite a few parameters, but simplifies the flow)
void outputAllFeatures(std::ofstream* output_file, bool output_2D_landmarks, bool output_3D_landmarks,
bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze,
const LandmarkDetector::CLNF& face_model, int frame_count, double time_stamp, bool detection_success,
cv::Point3f gazeDirection0, cv::Point3f gazeDirection1, cv::Vec2d gaze_angle, cv::Vec6d& pose_estimate, double fx, double fy, double cx, double cy,
const FaceAnalysis::FaceAnalyser& face_analyser)
{
double confidence = 0.5 * (1 - face_model.detection_certainty);
*output_file << frame_count + 1 << ", " << time_stamp << ", " << confidence << ", " << detection_success;
// Output the estimated gaze
if (output_gaze)
{
*output_file << ", " << gazeDirection0.x << ", " << gazeDirection0.y << ", " << gazeDirection0.z
<< ", " << gazeDirection1.x << ", " << gazeDirection1.y << ", " << gazeDirection1.z;
// Output gaze angle (same format as head pose angle)
*output_file << ", " << gaze_angle[0] << ", " << gaze_angle[1];
// Output eye landmarks
std::vector<cv::Point2d> eye_landmark_points = LandmarkDetector::CalculateAllEyeLandmarks(face_model);
for (size_t i = 0; i < eye_landmark_points.size(); ++i)
{
*output_file << ", " << eye_landmark_points[i].x;
}
for (size_t i = 0; i < eye_landmark_points.size(); ++i)
{
*output_file << ", " << eye_landmark_points[i].y;
}
}
// Output the estimated head pose
if (output_pose)
{
if(face_model.tracking_initialised)
{
*output_file << ", " << pose_estimate[0] << ", " << pose_estimate[1] << ", " << pose_estimate[2]
<< ", " << pose_estimate[3] << ", " << pose_estimate[4] << ", " << pose_estimate[5];
}
else
{
*output_file << ", 0, 0, 0, 0, 0, 0";
}
}
// Output the detected 2D facial landmarks
if (output_2D_landmarks)
{
for (int i = 0; i < face_model.pdm.NumberOfPoints() * 2; ++i)
{
if(face_model.tracking_initialised)
{
*output_file << ", " << face_model.detected_landmarks.at<double>(i);
}
else
{
*output_file << ", 0";
}
}
}
// Output the detected 3D facial landmarks
if (output_3D_landmarks)
{
cv::Mat_<double> shape_3D = face_model.GetShape(fx, fy, cx, cy);
for (int i = 0; i < face_model.pdm.NumberOfPoints() * 3; ++i)
{
if (face_model.tracking_initialised)
{
*output_file << ", " << shape_3D.at<double>(i);
}
else
{
*output_file << ", 0";
}
}
}
if (output_model_params)
{
for (int i = 0; i < 6; ++i)
{
if (face_model.tracking_initialised)
{
*output_file << ", " << face_model.params_global[i];
}
else
{
*output_file << ", 0";
}
}
for (int i = 0; i < face_model.pdm.NumberOfModes(); ++i)
{
if(face_model.tracking_initialised)
{
*output_file << ", " << face_model.params_local.at<double>(i, 0);
}
else
{
*output_file << ", 0";
}
}
}
if (output_AUs)
{
auto aus_reg = face_analyser.GetCurrentAUsReg();
vector<string> au_reg_names = face_analyser.GetAURegNames();
std::sort(au_reg_names.begin(), au_reg_names.end());
// write out ar the correct index
for (string au_name : au_reg_names)
{
for (auto au_reg : aus_reg)
{
if (au_name.compare(au_reg.first) == 0)
{
*output_file << ", " << au_reg.second;
break;
}
}
}
if (aus_reg.size() == 0)
{
for (size_t p = 0; p < face_analyser.GetAURegNames().size(); ++p)
{
*output_file << ", 0";
}
}
auto aus_class = face_analyser.GetCurrentAUsClass();
vector<string> au_class_names = face_analyser.GetAUClassNames();
std::sort(au_class_names.begin(), au_class_names.end());
// write out ar the correct index
for (string au_name : au_class_names)
{
for (auto au_class : aus_class)
{
if (au_name.compare(au_class.first) == 0)
{
*output_file << ", " << au_class.second;
break;
}
}
}
if (aus_class.size() == 0)
{
for (size_t p = 0; p < face_analyser.GetAUClassNames().size(); ++p)
{
*output_file << ", 0";
}
}
}
*output_file << endl;
}
void get_output_feature_params(vector<string> &output_similarity_aligned, vector<string> &output_hog_aligned_files, bool& visualize_track,
bool& visualize_align, bool& visualize_hog, bool &output_2D_landmarks, bool &output_3D_landmarks, bool &output_model_params,
bool &output_pose, bool &output_AUs, bool &output_gaze, vector<string> &arguments)
@ -836,7 +604,6 @@ void get_output_feature_params(vector<string> &output_similarity_aligned, vector
else if (arguments[i].compare("-hogalign") == 0)
{
output_hog_aligned_files.push_back(output_root + arguments[i + 1]);
create_directory_from_file(output_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
i++;

View File

@ -52,8 +52,7 @@ namespace LandmarkDetector
//=============================================================================================
// Helper functions for parsing the inputs
//=============================================================================================
void get_video_input_output_params(vector<string> &input_video_file, vector<string> &output_files,
vector<string> &output_video_files, string &output_codec, vector<string> &arguments);
void get_video_input_output_params(vector<string> &input_video_file, vector<string> &output_files, string &output_codec, vector<string> &arguments);
void get_camera_params(int &device, float &fx, float &fy, float &cx, float &cy, vector<string> &arguments);

View File

@ -96,8 +96,7 @@ void create_directories(string output_path)
}
// Extracting the following command line arguments -f, -op, -of, -ov (and possible ordered repetitions)
void get_video_input_output_params(vector<string> &input_video_files, vector<string> &output_files,
vector<string> &output_video_files, string& output_codec, vector<string> &arguments)
void get_video_input_output_params(vector<string> &input_video_files, vector<string> &output_files, string& output_codec, vector<string> &arguments)
{
bool* valid = new bool[arguments.size()];
@ -154,14 +153,6 @@ void get_video_input_output_params(vector<string> &input_video_files, vector<str
valid[i+1] = false;
i++;
}
else if (arguments[i].compare("-ov") == 0)
{
output_video_files.push_back(output_root + arguments[i + 1]);
create_directory_from_file(output_root + arguments[i + 1]);
valid[i] = false;
valid[i+1] = false;
i++;
}
else if (arguments[i].compare("-oc") == 0)
{
if(arguments[i + 1].length() == 4)

View File

@ -65,7 +65,7 @@ namespace Recorder
void WriteLine(int observation_count, double time_stamp, bool landmark_detection_success, double landmark_confidence,
const cv::Mat_<double>& landmarks_2D, const cv::Mat_<double>& landmarks_3D, const cv::Mat_<double>& pdm_model_params, const cv::Vec6d& rigid_shape_params, cv::Vec6d& pose_estimate,
const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const cv::Mat_<double>& eye_landmarks,
const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const std::vector<cv::Point2d>& eye_landmarks,
const std::vector<std::pair<std::string, double> >& au_intensities, const std::vector<std::pair<std::string, double> >& au_occurences);
// TODO have set functions?

View File

@ -87,7 +87,7 @@ namespace Recorder
// Gaze related observations
void SetObservationGaze(const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1,
const cv::Vec2d& gaze_angle, const cv::Mat_<double>& eye_landmarks);
const cv::Vec2d& gaze_angle, const std::vector<cv::Point2d>& eye_landmarks);
// Face alignment related observations
void SetObservationFaceAlign(const cv::Mat& aligned_face);
@ -145,7 +145,7 @@ namespace Recorder
cv::Point3f gaze_direction0;
cv::Point3f gaze_direction1;
cv::Vec2d gaze_angle;
cv::Mat_<double> eye_landmarks;
std::vector<cv::Point2d> eye_landmarks;
int observation_count;

View File

@ -157,7 +157,7 @@ bool RecorderCSV::Open(std::string output_file_name, bool output_2D_landmarks, b
// TODO check if the stream is open
void RecorderCSV::WriteLine(int observation_count, double time_stamp, bool landmark_detection_success, double landmark_confidence,
const cv::Mat_<double>& landmarks_2D, const cv::Mat_<double>& landmarks_3D, const cv::Mat_<double>& pdm_model_params, const cv::Vec6d& rigid_shape_params, cv::Vec6d& pose_estimate,
const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const cv::Mat_<double>& eye_landmarks,
const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2d& gaze_angle, const std::vector<cv::Point2d>& eye_landmarks,
const std::vector<std::pair<std::string, double> >& au_intensities, const std::vector<std::pair<std::string, double> >& au_occurences)
{

View File

@ -151,7 +151,7 @@ void RecorderOpenFace::WriteObservation()
if(output_tracked_video)
{
if (vis_to_out.empty)
if (vis_to_out.empty())
{
WARN_STREAM("Output tracked video frame is not set");
}
@ -197,7 +197,7 @@ void RecorderOpenFace::SetObservationActionUnits(const std::vector<std::pair<std
}
void RecorderOpenFace::SetObservationGaze(const cv::Point3f& gaze_direction0, const cv::Point3f& gaze_direction1,
const cv::Vec2d& gaze_angle, const cv::Mat_<double>& eye_landmarks)
const cv::Vec2d& gaze_angle, const std::vector<cv::Point2d>& eye_landmarks)
{
this->gaze_direction0 = gaze_direction0;
this->gaze_direction1 = gaze_direction1;