- Fixing the issue with gaze not tracking properly in video and landmark modes.

- Fixing the simscale/simalign bug
This commit is contained in:
Tadas Baltrusaitis 2017-03-08 11:46:50 -05:00
parent a3e66319b5
commit 52c50b4ff3
10 changed files with 226 additions and 256 deletions

3
.gitignore vendored
View file

@ -44,3 +44,6 @@ exe/Recording/Debug/
lib/3rdParty/dlib/Debug/
lib/local/FaceAnalyser/Debug/
lib/local/LandmarkDetector/Debug/
matlab_runners/Head Pose Experiments/experiments/biwi_out/
matlab_runners/Head Pose Experiments/experiments/bu_out/
matlab_runners/Head Pose Experiments/experiments/ict_out/

View file

@ -75,4 +75,5 @@ script:
- ../build/bin/FaceLandmarkImg -inroot ../videos -f Obama.jpg -outroot data -of obama.txt -op obama.3d -oi obama.bmp -multi_view 1 -wild -q
- ../build/bin/FaceLandmarkVidMulti -inroot ../videos -f multi_face.avi -outroot output -ov multi_face.avi -q
- ../build/bin/FeatureExtraction -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -ov blair.avi -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_test.avi -hogalign hog_test.dat -q
- ../build/bin/FeatureExtraction -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -simsize 200 -simscale 0.5 -ov blair.avi -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_test.avi -hogalign hog_test.dat -q
- ../build/bin/FaceLandmarkVid -inroot ../videos -f 1815_01_008_tony_blair.avi -f 0188_03_021_al_pacino.avi -f 0217_03_006_alanis_morissette.avi -outroot output_data -ov 1.avi -ov 2.avi -ov 3.avi -q

View file

@ -25,4 +25,5 @@ test_script:
- cmd: if exist "../videos" (FaceLandmarkImg.exe -inroot ../videos -f obama.jpg -outroot out_data -of obama.pts -op obama.3d -oi obama.bmp -q) else (FaceLandmarkImg.exe -inroot ../../videos -f obama.jpg -outroot out_data -of obama.pts -op obama.3d -oi obama.bmp -q)
- cmd: if exist "../videos" (FaceLandmarkVidMulti.exe -inroot ../videos -f multi_face.avi -ov multi_face.avi -q) else (FaceLandmarkVidMulti.exe -inroot ../../videos -f multi_face.avi -ov multi_face.avi -q)
- cmd: if exist "../videos" (FeatureExtraction.exe -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_track.avi -hogalign hog_test.dat -q) else (FeatureExtraction.exe -f "../../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -ov feat_track.avi -hogalign hog_test.dat -q)
- cmd: if exist "../videos" (FeatureExtraction.exe -f "../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -simsize 200 -simscale 0.5 -ov feat_track.avi -hogalign hog_test.dat -q) else (FeatureExtraction.exe -f "../../videos/1815_01_008_tony_blair.avi" -outroot output_features -of "1815_01_008_tony_blair.txt" -simalign aligned -simsize 200 -simscale 0.5 -ov feat_track.avi -hogalign hog_test.dat -q)
- cmd: if exist "../videos" (FaceLandmarkVid.exe -f "../videos/1815_01_008_tony_blair.avi" -ov track.avi -q) else (FaceLandmarkVid.exe -f "../../videos/1815_01_008_tony_blair.avi" -ov track.avi -q)

View file

@ -308,7 +308,7 @@ int main (int argc, char **argv)
vector<string> output_similarity_align;
vector<string> output_hog_align_files;
double sim_scale = 0.7;
double sim_scale = -1;
int sim_size = 112;
bool grayscale = false;
bool video_output = false;
@ -391,7 +391,10 @@ int main (int argc, char **argv)
}
// Creating a face analyser that will be used for AU extraction
FaceAnalysis::FaceAnalyser face_analyser(vector<cv::Vec3d>(), 0.7, 112, 112, au_loc, tri_loc);
// Make sure sim_scale is proportional to sim_size if not set
if (sim_scale == -1) sim_scale = sim_size * (0.7 / 112.0);
FaceAnalysis::FaceAnalyser face_analyser(vector<cv::Vec3d>(), sim_scale, sim_size, sim_size, au_loc, tri_loc);
while(!done) // this is not a for loop as we might also be reading from a webcam
{
@ -593,7 +596,7 @@ int main (int argc, char **argv)
}
if(hog_output_file.is_open())
{
FaceAnalysis::Extract_FHOG_descriptor(hog_descriptor, sim_warped_img, num_hog_rows, num_hog_cols);
face_analyser.GetLatestHOG(hog_descriptor, num_hog_rows, num_hog_cols);
if(visualise_hog && !det_parameters.quiet_mode)
{
@ -615,13 +618,13 @@ int main (int argc, char **argv)
pose_estimate = LandmarkDetector::GetCorrectedPoseCamera(face_model, fx, fy, cx, cy);
}
if(hog_output_file.is_open())
if (hog_output_file.is_open())
{
output_HOG_frame(&hog_output_file, detection_success, hog_descriptor, num_hog_rows, num_hog_cols);
}
// Write the similarity normalised output
if(!output_similarity_align.empty())
if (!output_similarity_align.empty())
{
if (sim_warped_img.channels() == 3 && grayscale)
@ -631,8 +634,8 @@ int main (int argc, char **argv)
char name[100];
// output the frame number
std::sprintf(name, "frame_det_%06d.bmp", frame_count);
// Filename is based on frame number
std::sprintf(name, "frame_det_%06d.bmp", frame_count + 1);
// Construct the output filename
boost::filesystem::path slash("/");
@ -1206,6 +1209,7 @@ void get_output_feature_params(vector<string> &output_similarity_aligned, vector
}
// Can process images via directories creating a separate output file per directory
void get_image_input_output_params_feats(vector<vector<string> > &input_image_files, bool& as_video, vector<string> &arguments)
{

View file

@ -74,184 +74,183 @@
namespace FaceAnalysis
{
class FaceAnalyser{
class FaceAnalyser {
public:
public:
enum RegressorType{ SVR_appearance_static_linear = 0, SVR_appearance_dynamic_linear = 1, SVR_dynamic_geom_linear = 2, SVR_combined_linear = 3, SVM_linear_stat = 4, SVM_linear_dyn = 5, SVR_linear_static_seg = 6, SVR_linear_dynamic_seg =7};
enum RegressorType { SVR_appearance_static_linear = 0, SVR_appearance_dynamic_linear = 1, SVR_dynamic_geom_linear = 2, SVR_combined_linear = 3, SVM_linear_stat = 4, SVM_linear_dyn = 5, SVR_linear_static_seg = 6, SVR_linear_dynamic_seg = 7 };
// Constructor from a model file (or a default one if not provided
// TODO scale width and height should be read in as part of the model as opposed to being here?
FaceAnalyser(vector<cv::Vec3d> orientation_bins = vector<cv::Vec3d>(), double scale = 0.7, int width = 112, int height = 112, std::string au_location = "AU_predictors/AU_all_best.txt", std::string tri_location = "model/tris_68_full.txt");
// Constructor from a model file (or a default one if not provided
// TODO scale width and height should be read in as part of the model as opposed to being here?
FaceAnalyser(vector<cv::Vec3d> orientation_bins = vector<cv::Vec3d>(), double scale = 0.7, int width = 112, int height = 112, std::string au_location = "AU_predictors/AU_all_best.txt", std::string tri_location = "model/tris_68_full.txt");
void AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, double timestamp_seconds, bool online = false, bool visualise = true);
void AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, double timestamp_seconds, bool online = false, bool visualise = true);
// If the features are extracted manually (shouldn't really be used)
void PredictAUs(const cv::Mat_<double>& hog_features, const cv::Mat_<double>& geom_features, const LandmarkDetector::CLNF& clnf_model, bool online);
// If the features are extracted manually (shouldn't really be used)
void PredictAUs(const cv::Mat_<double>& hog_features, const cv::Mat_<double>& geom_features, const LandmarkDetector::CLNF& clnf_model, bool online);
cv::Mat GetLatestHOGDescriptorVisualisation();
cv::Mat GetLatestHOGDescriptorVisualisation();
double GetCurrentTimeSeconds();
double GetCurrentTimeSeconds();
// Grab the current predictions about AUs from the face analyser
std::vector<std::pair<std::string, double>> GetCurrentAUsClass() const; // AU presence
std::vector<std::pair<std::string, double>> GetCurrentAUsReg() const; // AU intensity
std::vector<std::pair<std::string, double>> GetCurrentAUsCombined() const; // Both presense and intensity
// Grab the current predictions about AUs from the face analyser
std::vector<std::pair<std::string, double>> GetCurrentAUsClass() const; // AU presence
std::vector<std::pair<std::string, double>> GetCurrentAUsReg() const; // AU intensity
std::vector<std::pair<std::string, double>> GetCurrentAUsCombined() const; // Both presense and intensity
// A standalone call for predicting AUs from a static image, the first element in the pair represents occurence the second intensity
// This call is useful for detecting action units in images
std::pair<std::vector<std::pair<string, double>>, std::vector<std::pair<string, double>>> PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise = true);
// A standalone call for predicting AUs from a static image, the first element in the pair represents occurence the second intensity
// This call is useful for detecting action units in images
std::pair<std::vector<std::pair<string, double>>, std::vector<std::pair<string, double>>> PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise = true);
void Reset();
void Reset();
void GetLatestHOG(cv::Mat_<double>& hog_descriptor, int& num_rows, int& num_cols);
void GetLatestAlignedFace(cv::Mat& image);
void GetLatestHOG(cv::Mat_<double>& hog_descriptor, int& num_rows, int& num_cols);
void GetLatestAlignedFace(cv::Mat& image);
void GetLatestNeutralHOG(cv::Mat_<double>& hog_descriptor, int& num_rows, int& num_cols);
void GetLatestNeutralHOG(cv::Mat_<double>& hog_descriptor, int& num_rows, int& num_cols);
cv::Mat_<int> GetTriangulation();
cv::Mat_<int> GetTriangulation();
cv::Mat_<uchar> GetLatestAlignedFaceGrayscale();
void GetGeomDescriptor(cv::Mat_<double>& geom_desc);
void GetGeomDescriptor(cv::Mat_<double>& geom_desc);
// Grab the names of AUs being predicted
std::vector<std::string> GetAUClassNames() const; // Presence
std::vector<std::string> GetAURegNames() const; // Intensity
void ExtractCurrentMedians(vector<cv::Mat>& hog_medians, vector<cv::Mat>& face_image_medians, vector<cv::Vec3d>& orientations);
// Grab the names of AUs being predicted
std::vector<std::string> GetAUClassNames() const; // Presence
std::vector<std::string> GetAURegNames() const; // Intensity
// Identify if models are static or dynamic (useful for correction and shifting)
std::vector<bool> GetDynamicAUClass() const; // Presence
std::vector<std::pair<string, bool>> GetDynamicAUReg() const; // Intensity
// Identify if models are static or dynamic (useful for correction and shifting)
std::vector<bool> GetDynamicAUClass() const; // Presence
std::vector<std::pair<string, bool>> GetDynamicAUReg() const; // Intensity
void ExtractAllPredictionsOfflineReg(vector<std::pair<std::string, vector<double>>>& au_predictions, vector<double>& confidences, vector<bool>& successes, vector<double>& timestamps, bool dynamic);
void ExtractAllPredictionsOfflineClass(vector<std::pair<std::string, vector<double>>>& au_predictions, vector<double>& confidences, vector<bool>& successes, vector<double>& timestamps, bool dynamic);
void ExtractAllPredictionsOfflineReg(vector<std::pair<std::string, vector<double>>>& au_predictions, vector<double>& confidences, vector<bool>& successes, vector<double>& timestamps, bool dynamic);
void ExtractAllPredictionsOfflineClass(vector<std::pair<std::string, vector<double>>>& au_predictions, vector<double>& confidences, vector<bool>& successes, vector<double>& timestamps, bool dynamic);
private:
// Helper function for post-processing AU output files
void FaceAnalyser::PostprocessOutputFile(string output_file, bool dynamic);
// Where the predictions are kept
std::vector<std::pair<std::string, double>> AU_predictions_reg;
std::vector<std::pair<std::string, double>> AU_predictions_class;
private:
std::vector<std::pair<std::string, double>> AU_predictions_combined;
// Where the predictions are kept
std::vector<std::pair<std::string, double>> AU_predictions_reg;
std::vector<std::pair<std::string, double>> AU_predictions_class;
// Keeping track of AU predictions over time (useful for post-processing)
vector<double> timestamps;
std::map<std::string, vector<double>> AU_predictions_reg_all_hist;
std::map<std::string, vector<double>> AU_predictions_class_all_hist;
std::vector<double> confidences;
std::vector<bool> valid_preds;
std::vector<std::pair<std::string, double>> AU_predictions_combined;
int frames_tracking;
// Keeping track of AU predictions over time (useful for post-processing)
vector<double> timestamps;
std::map<std::string, vector<double>> AU_predictions_reg_all_hist;
std::map<std::string, vector<double>> AU_predictions_class_all_hist;
std::vector<double> confidences;
std::vector<bool> valid_preds;
// Cache of intermediate images
cv::Mat_<uchar> aligned_face_grayscale;
cv::Mat aligned_face;
cv::Mat hog_descriptor_visualisation;
int frames_tracking;
// Private members to be used for predictions
// The HOG descriptor of the last frame
cv::Mat_<double> hog_desc_frame;
int num_hog_rows;
int num_hog_cols;
// Cache of intermediate images
cv::Mat aligned_face_for_au;
cv::Mat aligned_face_for_output;
cv::Mat hog_descriptor_visualisation;
// Keep a running median of the hog descriptors and a aligned images
cv::Mat_<double> hog_desc_median;
cv::Mat_<double> face_image_median;
// Private members to be used for predictions
// The HOG descriptor of the last frame
cv::Mat_<double> hog_desc_frame;
int num_hog_rows;
int num_hog_cols;
// Use histograms for quick (but approximate) median computation
// Use the same for
vector<cv::Mat_<unsigned int> > hog_desc_hist;
// Keep a running median of the hog descriptors and a aligned images
cv::Mat_<double> hog_desc_median;
cv::Mat_<double> face_image_median;
// This is not being used at the moment as it is a bit slow
vector<cv::Mat_<unsigned int> > face_image_hist;
vector<int> face_image_hist_sum;
// Use histograms for quick (but approximate) median computation
// Use the same for
vector<cv::Mat_<unsigned int> > hog_desc_hist;
vector<cv::Vec3d> head_orientations;
// This is not being used at the moment as it is a bit slow
vector<cv::Mat_<unsigned int> > face_image_hist;
vector<int> face_image_hist_sum;
int num_bins_hog;
double min_val_hog;
double max_val_hog;
vector<int> hog_hist_sum;
int view_used;
vector<cv::Vec3d> head_orientations;
// The geometry descriptor (rigid followed by non-rigid shape parameters from CLNF)
cv::Mat_<double> geom_descriptor_frame;
cv::Mat_<double> geom_descriptor_median;
int num_bins_hog;
double min_val_hog;
double max_val_hog;
vector<int> hog_hist_sum;
int view_used;
int geom_hist_sum;
cv::Mat_<unsigned int> geom_desc_hist;
int num_bins_geom;
double min_val_geom;
double max_val_geom;
// The geometry descriptor (rigid followed by non-rigid shape parameters from CLNF)
cv::Mat_<double> geom_descriptor_frame;
cv::Mat_<double> geom_descriptor_median;
// Using the bounding box of previous analysed frame to determine if a reset is needed
cv::Rect_<double> face_bounding_box;
int geom_hist_sum;
cv::Mat_<unsigned int> geom_desc_hist;
int num_bins_geom;
double min_val_geom;
double max_val_geom;
// The AU predictions internally
std::vector<std::pair<std::string, double>> PredictCurrentAUs(int view);
std::vector<std::pair<std::string, double>> PredictCurrentAUsClass(int view);
// Using the bounding box of previous analysed frame to determine if a reset is needed
cv::Rect_<double> face_bounding_box;
// special step for online (rather than offline AU prediction)
std::vector<pair<string, double>> CorrectOnlineAUs(std::vector<std::pair<std::string, double>> predictions_orig, int view, bool dyn_shift = false, bool dyn_scale = false, bool update_track = true, bool clip_values = false);
// The AU predictions internally
std::vector<std::pair<std::string, double>> PredictCurrentAUs(int view);
std::vector<std::pair<std::string, double>> PredictCurrentAUsClass(int view);
void ReadAU(std::string au_location);
// special step for online (rather than offline AU prediction)
std::vector<pair<string, double>> CorrectOnlineAUs(std::vector<std::pair<std::string, double>> predictions_orig, int view, bool dyn_shift = false, bool dyn_scale = false, bool update_track = true, bool clip_values = false);
void ReadRegressor(std::string fname, const vector<string>& au_names);
void ReadAU(std::string au_location);
// A utility function for keeping track of approximate running medians used for AU and emotion inference using a set of histograms (the histograms are evenly spaced from min_val to max_val)
// Descriptor has to be a row vector
// TODO this duplicates some other code
void UpdateRunningMedian(cv::Mat_<unsigned int>& histogram, int& hist_sum, cv::Mat_<double>& median, const cv::Mat_<double>& descriptor, bool update, int num_bins, double min_val, double max_val);
void ExtractMedian(cv::Mat_<unsigned int>& histogram, int hist_count, cv::Mat_<double>& median, int num_bins, double min_val, double max_val);
void ReadRegressor(std::string fname, const vector<string>& au_names);
// The linear SVR regressors
SVR_static_lin_regressors AU_SVR_static_appearance_lin_regressors;
SVR_dynamic_lin_regressors AU_SVR_dynamic_appearance_lin_regressors;
// A utility function for keeping track of approximate running medians used for AU and emotion inference using a set of histograms (the histograms are evenly spaced from min_val to max_val)
// Descriptor has to be a row vector
// TODO this duplicates some other code
void UpdateRunningMedian(cv::Mat_<unsigned int>& histogram, int& hist_sum, cv::Mat_<double>& median, const cv::Mat_<double>& descriptor, bool update, int num_bins, double min_val, double max_val);
void ExtractMedian(cv::Mat_<unsigned int>& histogram, int hist_count, cv::Mat_<double>& median, int num_bins, double min_val, double max_val);
// The linear SVM classifiers
SVM_static_lin AU_SVM_static_appearance_lin;
SVM_dynamic_lin AU_SVM_dynamic_appearance_lin;
// The linear SVR regressors
SVR_static_lin_regressors AU_SVR_static_appearance_lin_regressors;
SVR_dynamic_lin_regressors AU_SVR_dynamic_appearance_lin_regressors;
// The AUs predicted by the model are not always 0 calibrated to a person. That is they don't always predict 0 for a neutral expression
// Keeping track of the predictions we can correct for this, by assuming that at least "ratio" of frames are neutral and subtract that value of prediction, only perform the correction after min_frames
void UpdatePredictionTrack(cv::Mat_<unsigned int>& prediction_corr_histogram, int& prediction_correction_count, vector<double>& correction, const vector<pair<string, double>>& predictions, double ratio=0.25, int num_bins = 200, double min_val = -3, double max_val = 5, int min_frames = 10);
void GetSampleHist(cv::Mat_<unsigned int>& prediction_corr_histogram, int prediction_correction_count, vector<double>& sample, double ratio, int num_bins = 200, double min_val = 0, double max_val = 5);
// The linear SVM classifiers
SVM_static_lin AU_SVM_static_appearance_lin;
SVM_dynamic_lin AU_SVM_dynamic_appearance_lin;
void PostprocessPredictions();
// The AUs predicted by the model are not always 0 calibrated to a person. That is they don't always predict 0 for a neutral expression
// Keeping track of the predictions we can correct for this, by assuming that at least "ratio" of frames are neutral and subtract that value of prediction, only perform the correction after min_frames
void UpdatePredictionTrack(cv::Mat_<unsigned int>& prediction_corr_histogram, int& prediction_correction_count, vector<double>& correction, const vector<pair<string, double>>& predictions, double ratio = 0.25, int num_bins = 200, double min_val = -3, double max_val = 5, int min_frames = 10);
void GetSampleHist(cv::Mat_<unsigned int>& prediction_corr_histogram, int prediction_correction_count, vector<double>& sample, double ratio, int num_bins = 200, double min_val = 0, double max_val = 5);
vector<cv::Mat_<unsigned int>> au_prediction_correction_histogram;
vector<int> au_prediction_correction_count;
void PostprocessPredictions();
// Some dynamic scaling (the logic is that before the extreme versions of expression or emotion are shown,
// it is hard to tell the boundaries, this allows us to scale the model to the most extreme seen)
// They have to be view specific
vector<vector<double>> dyn_scaling;
vector<cv::Mat_<unsigned int>> au_prediction_correction_histogram;
vector<int> au_prediction_correction_count;
// Keeping track of predictions for summary stats
cv::Mat_<double> AU_prediction_track;
cv::Mat_<double> geom_desc_track;
// Some dynamic scaling (the logic is that before the extreme versions of expression or emotion are shown,
// it is hard to tell the boundaries, this allows us to scale the model to the most extreme seen)
// They have to be view specific
vector<vector<double>> dyn_scaling;
double current_time_seconds;
// Keeping track of predictions for summary stats
cv::Mat_<double> AU_prediction_track;
cv::Mat_<double> geom_desc_track;
// Used for face alignment
cv::Mat_<int> triangulation;
double align_scale;
int align_width;
int align_height;
double current_time_seconds;
// Useful placeholder for renormalizing the initial frames of shorter videos
int max_init_frames = 3000;
vector<cv::Mat_<double>> hog_desc_frames_init;
vector<cv::Mat_<double>> geom_descriptor_frames_init;
vector<int> views;
bool postprocessed = false;
int frames_tracking_succ = 0;
// Used for face alignment
cv::Mat_<int> triangulation;
double align_scale;
int align_width;
int align_height;
};
//===========================================================================
// Useful placeholder for renormalizing the initial frames of shorter videos
int max_init_frames = 3000;
vector<cv::Mat_<double>> hog_desc_frames_init;
vector<cv::Mat_<double>> geom_descriptor_frames_init;
vector<int> views;
bool postprocessed = false;
int frames_tracking_succ = 0;
};
//===========================================================================
}
#endif

View file

@ -226,7 +226,7 @@ void FaceAnalyser::GetLatestHOG(cv::Mat_<double>& hog_descriptor, int& num_rows,
void FaceAnalyser::GetLatestAlignedFace(cv::Mat& image)
{
image = this->aligned_face.clone();
image = this->aligned_face_for_output.clone();
}
void FaceAnalyser::GetLatestNeutralHOG(cv::Mat_<double>& hog_descriptor, int& num_rows, int& num_cols)
@ -267,50 +267,15 @@ int GetViewId(const vector<cv::Vec3d> orientations_all, const cv::Vec3d& orienta
}
void FaceAnalyser::ExtractCurrentMedians(vector<cv::Mat>& hog_medians, vector<cv::Mat>& face_image_medians, vector<cv::Vec3d>& orientations)
{
orientations = this->head_orientations;
for(size_t i = 0; i < orientations.size(); ++i)
{
cv::Mat_<double> median_face(this->face_image_median.rows, this->face_image_median.cols, 0.0);
cv::Mat_<double> median_hog(this->hog_desc_median.rows, this->hog_desc_median.cols, 0.0);
ExtractMedian(this->face_image_hist[i], this->face_image_hist_sum[i], median_face, 256, 0, 255);
ExtractMedian(this->hog_desc_hist[i], this->hog_hist_sum[i], median_hog, this->num_bins_hog, 0, 1);
// Add the HOG sample
hog_medians.push_back(median_hog.clone());
// For the face image need to convert it to suitable format
cv::Mat_<uchar> aligned_face_cols_uchar;
median_face.convertTo(aligned_face_cols_uchar, CV_8U);
cv::Mat aligned_face_uchar;
if(aligned_face.channels() == 1)
{
aligned_face_uchar = cv::Mat(aligned_face.rows, aligned_face.cols, CV_8U, aligned_face_cols_uchar.data);
}
else
{
aligned_face_uchar = cv::Mat(aligned_face.rows, aligned_face.cols, CV_8UC3, aligned_face_cols_uchar.data);
}
face_image_medians.push_back(aligned_face_uchar.clone());
}
}
std::pair<std::vector<std::pair<string, double>>, std::vector<std::pair<string, double>>> FaceAnalyser::PredictStaticAUs(const cv::Mat& frame, const LandmarkDetector::CLNF& clnf, bool visualise)
{
// First align the face
AlignFaceMask(aligned_face, frame, clnf, triangulation, true, align_scale, align_width, align_height);
AlignFaceMask(aligned_face_for_au, frame, clnf, triangulation, true, 0.7, 112, 112);
// Extract HOG descriptor from the frame and convert it to a useable format
cv::Mat_<double> hog_descriptor;
Extract_FHOG_descriptor(hog_descriptor, aligned_face, this->num_hog_rows, this->num_hog_cols);
Extract_FHOG_descriptor(hog_descriptor, aligned_face_for_au, this->num_hog_rows, this->num_hog_cols);
// Store the descriptor
hog_desc_frame = hog_descriptor;
@ -326,10 +291,10 @@ std::pair<std::vector<std::pair<string, double>>, std::vector<std::pair<string,
cv::hconcat(locs.t(), geom_descriptor_frame.clone(), geom_descriptor_frame);
// First convert the face image to double representation as a row vector
cv::Mat_<uchar> aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1);
cv::Mat_<double> aligned_face_cols_double;
aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F);
// First convert the face image to double representation as a row vector, TODO rem
//cv::Mat_<uchar> aligned_face_cols(1, aligned_face_for_au.cols * aligned_face_for_au.rows * aligned_face_for_au.channels(), aligned_face_for_au.data, 1);
//cv::Mat_<double> aligned_face_cols_double;
//aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F);
// Visualising the median HOG
if (visualise)
@ -361,28 +326,33 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
frames_tracking++;
// First align the face if tracking was successfull
if(clnf_model.detection_success)
if (clnf_model.detection_success)
{
AlignFaceMask(aligned_face, frame, clnf_model, triangulation, true, align_scale, align_width, align_height);
}
else
{
aligned_face = cv::Mat(align_height, align_width, CV_8UC3);
aligned_face.setTo(0);
}
if(aligned_face.channels() == 3)
{
cv::cvtColor(aligned_face, aligned_face_grayscale, CV_BGR2GRAY);
// The aligned face requirement for AUs
AlignFaceMask(aligned_face_for_au, frame, clnf_model, triangulation, true, 0.7, 112, 112);
// If the output requirement matches use the already computed one, else compute it again
if (align_scale == 0.7 && align_width == 112 && align_height == 112)
{
aligned_face_for_output = aligned_face_for_au.clone();
}
else
{
AlignFaceMask(aligned_face_for_output, frame, clnf_model, triangulation, true, align_scale, align_width, align_height);
}
}
else
{
aligned_face_grayscale = aligned_face.clone();
aligned_face_for_output = cv::Mat(align_height, align_width, CV_8UC3);
aligned_face_for_au = cv::Mat(112, 112, CV_8UC3);
aligned_face_for_output.setTo(0);
aligned_face_for_au.setTo(0);
}
// Extract HOG descriptor from the frame and convert it to a useable format
cv::Mat_<double> hog_descriptor;
Extract_FHOG_descriptor(hog_descriptor, aligned_face, this->num_hog_rows, this->num_hog_cols);
Extract_FHOG_descriptor(hog_descriptor, aligned_face_for_au, this->num_hog_rows, this->num_hog_cols);
// Store the descriptor
hog_desc_frame = hog_descriptor;
@ -425,7 +395,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
frames_tracking_succ++;
// A small speedup
if(frames_tracking % 2 == 1)
if (frames_tracking % 2 == 1)
{
UpdateRunningMedian(this->hog_desc_hist[orientation_to_use], this->hog_hist_sum[orientation_to_use], this->hog_desc_median, hog_descriptor, update_median, this->num_bins_hog, this->min_val_hog, this->max_val_hog);
this->hog_desc_median.setTo(0, this->hog_desc_median < 0);
@ -434,7 +404,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
// Geom descriptor and its median
geom_descriptor_frame = clnf_model.params_local.t();
if(!clnf_model.detection_success)
if (!clnf_model.detection_success)
{
geom_descriptor_frame.setTo(0);
}
@ -445,21 +415,18 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
cv::hconcat(locs.t(), geom_descriptor_frame.clone(), geom_descriptor_frame);
// A small speedup
if(frames_tracking % 2 == 1)
if (frames_tracking % 2 == 1)
{
UpdateRunningMedian(this->geom_desc_hist, this->geom_hist_sum, this->geom_descriptor_median, geom_descriptor_frame, update_median, this->num_bins_geom, this->min_val_geom, this->max_val_geom);
}
// First convert the face image to double representation as a row vector
cv::Mat_<uchar> aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1);
cv::Mat_<double> aligned_face_cols_double;
aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F);
// TODO get rid of this completely as it takes too long?
//UpdateRunningMedian(this->face_image_hist[orientation_to_use], this->face_image_hist_sum[orientation_to_use], this->face_image_median, aligned_face_cols_double, update_median, 256, 0, 255);
// First convert the face image to double representation as a row vector, TODO rem?
//cv::Mat_<uchar> aligned_face_cols(1, aligned_face.cols * aligned_face.rows * aligned_face.channels(), aligned_face.data, 1);
//cv::Mat_<double> aligned_face_cols_double;
//aligned_face_cols.convertTo(aligned_face_cols_double, CV_64F);
// Visualising the median HOG
if(visualise)
if (visualise)
{
FaceAnalysis::Visualise_FHOG(hog_descriptor, num_hog_rows, num_hog_cols, hog_descriptor_visualisation);
}
@ -468,9 +435,9 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
AU_predictions_reg = PredictCurrentAUs(orientation_to_use);
std::vector<std::pair<std::string, double>> AU_predictions_reg_corrected;
if(online)
if (online)
{
AU_predictions_reg_corrected = CorrectOnlineAUs(AU_predictions_reg, orientation_to_use, true, false, clnf_model.detection_success);
AU_predictions_reg_corrected = CorrectOnlineAUs(AU_predictions_reg, orientation_to_use, true, false, clnf_model.detection_success, true);
}
// Add the reg predictions to the historic data
@ -479,7 +446,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
// Find the appropriate AU (if not found add it)
// Only add if the detection was successful
if(clnf_model.detection_success)
if (clnf_model.detection_success)
{
AU_predictions_reg_all_hist[AU_predictions_reg[au].first].push_back(AU_predictions_reg[au].second);
}
@ -496,7 +463,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
// Find the appropriate AU (if not found add it)
// Only add if the detection was successful
if(clnf_model.detection_success)
if (clnf_model.detection_success)
{
AU_predictions_class_all_hist[AU_predictions_class[au].first].push_back(AU_predictions_class[au].second);
}
@ -507,7 +474,7 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
}
if(online)
if (online)
{
AU_predictions_reg = AU_predictions_reg_corrected;
}
@ -531,8 +498,6 @@ void FaceAnalyser::AddNextFrame(const cv::Mat& frame, const LandmarkDetector::CL
valid_preds.push_back(success);
timestamps.push_back(timestamp_seconds);
}
void FaceAnalyser::GetGeomDescriptor(cv::Mat_<double>& geom_desc)
@ -1101,12 +1066,6 @@ vector<pair<string, double>> FaceAnalyser::PredictCurrentAUsClass(int view)
return predictions;
}
cv::Mat_<uchar> FaceAnalyser::GetLatestAlignedFaceGrayscale()
{
return aligned_face_grayscale.clone();
}
cv::Mat FaceAnalyser::GetLatestHOGDescriptorVisualisation()
{
return hog_descriptor_visualisation;

View file

@ -221,19 +221,19 @@ namespace FaceAnalysis
destination_landmarks.col(1) = destination_landmarks.col(1) + warp_matrix(1,2);
// Move the eyebrows up to include more of upper face
destination_landmarks.at<double>(0,1) -= 30;
destination_landmarks.at<double>(16,1) -= 30;
destination_landmarks.at<double>(0,1) -= (30/0.7)*sim_scale;
destination_landmarks.at<double>(16,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(17,1) -= 30;
destination_landmarks.at<double>(18,1) -= 30;
destination_landmarks.at<double>(19,1) -= 30;
destination_landmarks.at<double>(20,1) -= 30;
destination_landmarks.at<double>(21,1) -= 30;
destination_landmarks.at<double>(22,1) -= 30;
destination_landmarks.at<double>(23,1) -= 30;
destination_landmarks.at<double>(24,1) -= 30;
destination_landmarks.at<double>(25,1) -= 30;
destination_landmarks.at<double>(26,1) -= 30;
destination_landmarks.at<double>(17,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(18,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(19,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(20,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(21,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(22,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(23,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(24,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(25,1) -= (30 / 0.7)*sim_scale;
destination_landmarks.at<double>(26,1) -= (30 / 0.7)*sim_scale;
destination_landmarks = cv::Mat(destination_landmarks.t()).reshape(1, 1).t();

View file

@ -366,6 +366,9 @@ void CLNF::Read(string main_location)
// The other module locations should be defined as relative paths from the main model
boost::filesystem::path root = boost::filesystem::path(main_location).parent_path();
// Assume no eye model, unless read-in
eye_model = false;
// The main file contains the references to other files
while (!locations.eof())
{
@ -387,6 +390,7 @@ void CLNF::Read(string main_location)
location = location.substr(0, location.size()-1);
}
// append to root
location = (root / location).string();
if (module.compare("LandmarkDetector") == 0)
@ -536,7 +540,6 @@ void CLNF::Read(string main_location)
tracking_initialised = false;
model_likelihood = -10; // very low
detection_certainty = 1; // very uncertain
eye_model = false;
// Initialising default values for the rest of the variables

View file

@ -35,7 +35,7 @@ for i=1:numel(in_dirs)
command = cat(2, command, ['-asvid -fdir "' in_dirs{i} '" -of "' outputFile '" ']);
command = cat(2, command, [' -simalign "' outputDir_aligned '" -hogalign "' outputHOG_aligned '"']);
command = cat(2, command, [' -simalign "' outputDir_aligned '" -simsize 200 -hogalign "' outputHOG_aligned '"']);
end