From 790e10fdbd8836d90401238db1217281a92a75ab Mon Sep 17 00:00:00 2001 From: Tadas Baltrusaitis Date: Tue, 24 Oct 2017 16:26:08 +0100 Subject: [PATCH] Moving everything to world coordinate space to avoid confusion, it is still possible to call with respect to camera space through code though. --- exe/FaceLandmarkImg/FaceLandmarkImg.cpp | 8 +- exe/FaceLandmarkVid/FaceLandmarkVid.cpp | 5 +- .../FaceLandmarkVidMulti.cpp | 5 +- exe/FeatureExtraction/FeatureExtraction.cpp | 16 +-- lib/local/FaceAnalyser/CMakeLists.txt | 1 + lib/local/FaceAnalyser/src/GazeEstimation.cpp | 2 +- .../include/LandmarkDetectorFunc.h | 15 +-- .../include/LandmarkDetectorUtils.h | 2 +- .../src/LandmarkDetectorFunc.cpp | 100 +++++------------- .../src/LandmarkDetectorUtils.cpp | 9 +- 10 files changed, 44 insertions(+), 119 deletions(-) diff --git a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp index 2c33010..bea0c0e 100644 --- a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp +++ b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp @@ -421,7 +421,7 @@ int main (int argc, char **argv) bool success = LandmarkDetector::DetectLandmarksInImage(grayscale_image, face_detections[face], clnf_model, det_parameters); // Estimate head pose and eye gaze - cv::Vec6d headPose = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy); + cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy); // Gaze tracking, absolute gaze direction cv::Point3f gazeDirection0(0, 0, -1); @@ -477,7 +477,7 @@ int main (int argc, char **argv) if (det_parameters.track_gaze) { - cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy); + cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy); // Draw it in reddish if uncertain, blueish if certain LandmarkDetector::DrawBox(read_image, pose_estimate_to_draw, cv::Scalar(255.0, 0, 0), 3, fx, fy, cx, cy); @@ -538,7 +538,7 @@ int main (int argc, char **argv) LandmarkDetector::DetectLandmarksInImage(grayscale_image, bounding_boxes[i], clnf_model, det_parameters); // Estimate head pose and eye gaze - cv::Vec6d headPose = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy); + cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy); // Gaze tracking, absolute gaze direction cv::Point3f gazeDirection0(0, 0, -1); @@ -573,7 +573,7 @@ int main (int argc, char **argv) if (det_parameters.track_gaze) { - cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy); + cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy); // Draw it in reddish if uncertain, blueish if certain LandmarkDetector::DrawBox(read_image, pose_estimate_to_draw, cv::Scalar(255.0, 0, 0), 3, fx, fy, cx, cy); diff --git a/exe/FaceLandmarkVid/FaceLandmarkVid.cpp b/exe/FaceLandmarkVid/FaceLandmarkVid.cpp index a17e6f5..54e96be 100644 --- a/exe/FaceLandmarkVid/FaceLandmarkVid.cpp +++ b/exe/FaceLandmarkVid/FaceLandmarkVid.cpp @@ -112,7 +112,7 @@ void visualise_tracking(cv::Mat& captured_image, const LandmarkDetector::CLNF& f // A rough heuristic for box around the face width int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0); - cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy); + cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy); // Draw it in reddish if uncertain, blueish if certain LandmarkDetector::DrawBox(captured_image, pose_estimate_to_draw, cv::Scalar((1 - vis_certainty)*255.0, 0, vis_certainty * 255), thickness, fx, fy, cx, cy); @@ -161,9 +161,8 @@ int main (int argc, char **argv) // Get the input output file parameters // Indicates that rotation should be with respect to world or camera coordinates - bool u; string output_codec; - LandmarkDetector::get_video_input_output_params(files, out_dummy, output_video_files, u, output_codec, arguments); + LandmarkDetector::get_video_input_output_params(files, out_dummy, output_video_files, output_codec, arguments); // The modules that are being used for tracking LandmarkDetector::CLNF clnf_model(det_parameters.model_location); diff --git a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp index 4b94143..0845ced 100644 --- a/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp +++ b/exe/FaceLandmarkVidMulti/FaceLandmarkVidMulti.cpp @@ -126,9 +126,8 @@ int main (int argc, char **argv) det_parameters.push_back(det_params); // Get the input output file parameters - bool u; string output_codec; - LandmarkDetector::get_video_input_output_params(files, dummy_out, tracked_videos_output, u, output_codec, arguments); + LandmarkDetector::get_video_input_output_params(files, dummy_out, tracked_videos_output, output_codec, arguments); // Get camera parameters LandmarkDetector::get_camera_params(device, fx, fy, cx, cy, arguments); @@ -355,7 +354,7 @@ int main (int argc, char **argv) int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0); // Work out the pose of the head from the tracked model - cv::Vec6d pose_estimate = LandmarkDetector::GetCorrectedPoseWorld(clnf_models[model], fx, fy, cx, cy); + cv::Vec6d pose_estimate = LandmarkDetector::GetPose(clnf_models[model], fx, fy, cx, cy); // Draw it in reddish if uncertain, blueish if certain LandmarkDetector::DrawBox(disp_image, pose_estimate, cv::Scalar((1-detection_certainty)*255.0,0, detection_certainty*255), thickness, fx, fy, cx, cy); diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp index 2d9bc38..6e55f3f 100644 --- a/exe/FeatureExtraction/FeatureExtraction.cpp +++ b/exe/FeatureExtraction/FeatureExtraction.cpp @@ -169,7 +169,7 @@ void visualise_tracking(cv::Mat& captured_image, const LandmarkDetector::CLNF& f // A rough heuristic for box around the face width int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0); - cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy); + cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy); // Draw it in reddish if uncertain, blueish if certain LandmarkDetector::DrawBox(captured_image, pose_estimate_to_draw, cv::Scalar((1 - vis_certainty)*255.0, 0, vis_certainty * 255), thickness, fx, fy, cx, cy); @@ -223,10 +223,8 @@ int main (int argc, char **argv) // Get the input output file parameters - // Indicates that rotation should be with respect to camera or world coordinates - bool use_camera_coordinates = false; string output_codec; //not used but should - LandmarkDetector::get_video_input_output_params(input_files, output_files, tracked_videos_output, use_camera_coordinates, output_codec, arguments); + LandmarkDetector::get_video_input_output_params(input_files, output_files, tracked_videos_output, output_codec, arguments); bool video_input = true; bool images_as_video = false; @@ -517,15 +515,7 @@ int main (int argc, char **argv) } // Work out the pose of the head from the tracked model - cv::Vec6d pose_estimate; - if(use_camera_coordinates) - { - pose_estimate = LandmarkDetector::GetCorrectedPoseCamera(face_model, fx, fy, cx, cy); - } - else - { - pose_estimate = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy); - } + cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy); if (hog_output_file.is_open()) { diff --git a/lib/local/FaceAnalyser/CMakeLists.txt b/lib/local/FaceAnalyser/CMakeLists.txt index 1cab4e4..41d0c13 100644 --- a/lib/local/FaceAnalyser/CMakeLists.txt +++ b/lib/local/FaceAnalyser/CMakeLists.txt @@ -1,5 +1,6 @@ #TBB library include_directories(${TBB_ROOT_DIR}/include) +include_directories(${BLAS_ROOT_DIR}) include_directories(${BOOST_INCLUDE_DIR}) diff --git a/lib/local/FaceAnalyser/src/GazeEstimation.cpp b/lib/local/FaceAnalyser/src/GazeEstimation.cpp index dbfd433..ebc002a 100644 --- a/lib/local/FaceAnalyser/src/GazeEstimation.cpp +++ b/lib/local/FaceAnalyser/src/GazeEstimation.cpp @@ -88,7 +88,7 @@ cv::Point3f GetPupilPosition(cv::Mat_ eyeLdmks3d){ void FaceAnalysis::EstimateGaze(const LandmarkDetector::CLNF& clnf_model, cv::Point3f& gaze_absolute, float fx, float fy, float cx, float cy, bool left_eye) { - cv::Vec6d headPose = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy); + cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy); cv::Vec3d eulerAngles(headPose(3), headPose(4), headPose(5)); cv::Matx33d rotMat = LandmarkDetector::Euler2RotationMatrix(eulerAngles); diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorFunc.h b/lib/local/LandmarkDetector/include/LandmarkDetectorFunc.h index e6bccb7..9d5e006 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorFunc.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorFunc.h @@ -68,18 +68,13 @@ namespace LandmarkDetector //================================================================ // Helper function for getting head pose from CLNF parameters - // Return the current estimate of the head pose, this can be either in camera or world coordinate space + // Return the current estimate of the head pose in world coordinates with camera at origin (0,0,0) // The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] - cv::Vec6d GetPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy); - cv::Vec6d GetPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy); - - // Getting a head pose estimate from the currently detected landmarks, with appropriate correction for perspective - // This is because rotation estimate under orthographic assumption is only correct close to the centre of the image - // These methods attempt to correct for that - // The pose returned can be either in camera or world coordinates + cv::Vec6d GetPose(const CLNF& clnf_model, float fx, float fy, float cx, float cy); + + // Return the current estimate of the head pose in world coordinates with camera at origin (0,0,0), but with rotation representing if the head is looking at the camera // The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] - cv::Vec6d GetCorrectedPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy); - cv::Vec6d GetCorrectedPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy); + cv::Vec6d GetPoseWRTCamera(const CLNF& clnf_model, float fx, float fy, float cx, float cy); //=========================================================================== diff --git a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h index e102964..b54ffe7 100644 --- a/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h +++ b/lib/local/LandmarkDetector/include/LandmarkDetectorUtils.h @@ -53,7 +53,7 @@ namespace LandmarkDetector // Helper functions for parsing the inputs //============================================================================================= void get_video_input_output_params(vector &input_video_file, vector &output_files, - vector &output_video_files, bool& camera_coordinates_pose, string &output_codec, vector &arguments); + vector &output_video_files, string &output_codec, vector &arguments); void get_camera_params(int &device, float &fx, float &fy, float &cx, float &cy, vector &arguments); diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp index 62b0c08..f8460d9 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp @@ -46,71 +46,19 @@ using namespace LandmarkDetector; -// Getting a head pose estimate from the currently detected landmarks (rotation with respect to point camera) +// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to the PDM assuming an orthographic camera +// which is only correct close to the centre of the image +// This method returns a corrected pose estimate with respect to world coordinates with camera at origin (0,0,0) // The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] -cv::Vec6d LandmarkDetector::GetPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy) +cv::Vec6d LandmarkDetector::GetPose(const CLNF& clnf_model, float fx, float fy, float cx, float cy) { - if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) - { - double Z = fx / clnf_model.params_global[0]; - - double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z; - double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z; - - return cv::Vec6d(X, Y, Z, clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]); - } - else - { - return cv::Vec6d(0,0,0,0,0,0); - } -} - -// Getting a head pose estimate from the currently detected landmarks (rotation in world coordinates) -// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] -cv::Vec6d LandmarkDetector::GetPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy) -{ - if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) - { - double Z = fx / clnf_model.params_global[0]; - - double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z; - double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z; - - // Here we correct for the camera orientation, for this need to determine the angle the camera makes with the head pose - double z_x = cv::sqrt(X * X + Z * Z); - double eul_x = atan2(Y, z_x); - - double z_y = cv::sqrt(Y * Y + Z * Z); - double eul_y = -atan2(X, z_y); - - cv::Matx33d camera_rotation = LandmarkDetector::Euler2RotationMatrix(cv::Vec3d(eul_x, eul_y, 0)); - cv::Matx33d head_rotation = LandmarkDetector::AxisAngle2RotationMatrix(cv::Vec3d(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3])); - - cv::Matx33d corrected_rotation = camera_rotation.t() * head_rotation; - - cv::Vec3d euler_corrected = LandmarkDetector::RotationMatrix2Euler(corrected_rotation); - - return cv::Vec6d(X, Y, Z, euler_corrected[0], euler_corrected[1], euler_corrected[2]); - } - else - { - return cv::Vec6d(0,0,0,0,0,0); - } -} - -// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to orthographic camera issue -// This is because rotation estimate under orthographic assumption is only correct close to the centre of the image -// This method returns a corrected pose estimate with respect to world coordinates (Experimental) -// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] -cv::Vec6d LandmarkDetector::GetCorrectedPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy) -{ - if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) + if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) { // This is used as an initial estimate for the iterative PnP algorithm double Z = fx / clnf_model.params_global[0]; - - double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z; - double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z; + + double X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z; + double Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z; // Correction for orientation @@ -129,35 +77,35 @@ cv::Vec6d LandmarkDetector::GetCorrectedPoseWorld(const CLNF& clnf_model, double // The camera matrix cv::Matx33d camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1); - + cv::Vec3d vec_trans(X, Y, Z); cv::Vec3d vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]); - + cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true); cv::Vec3d euler = LandmarkDetector::AxisAngle2Euler(vec_rot); - + return cv::Vec6d(vec_trans[0], vec_trans[1], vec_trans[2], euler[0], euler[1], euler[2]); } else { - return cv::Vec6d(0,0,0,0,0,0); + return cv::Vec6d(0, 0, 0, 0, 0, 0); } } // Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to perspective projection -// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates) (Experimental) +// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates), which is useful to find out if the person is looking at a camera // The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z] -cv::Vec6d LandmarkDetector::GetCorrectedPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy) +cv::Vec6d LandmarkDetector::GetPoseWRTCamera(const CLNF& clnf_model, float fx, float fy, float cx, float cy) { - if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) + if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0) { double Z = fx / clnf_model.params_global[0]; - - double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z; - double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z; - + + double X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z; + double Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z; + // Correction for orientation // 3D points @@ -168,17 +116,17 @@ cv::Vec6d LandmarkDetector::GetCorrectedPoseCamera(const CLNF& clnf_model, doubl // 2D points cv::Mat_ landmarks_2D = clnf_model.detected_landmarks; - + landmarks_2D = landmarks_2D.reshape(1, 2).t(); // Solving the PNP model // The camera matrix cv::Matx33d camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1); - + cv::Vec3d vec_trans(X, Y, Z); cv::Vec3d vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]); - + cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true); // Here we correct for the camera orientation, for this need to determine the angle the camera makes with the head pose @@ -194,12 +142,12 @@ cv::Vec6d LandmarkDetector::GetCorrectedPoseCamera(const CLNF& clnf_model, doubl cv::Matx33d corrected_rotation = camera_rotation * head_rotation; cv::Vec3d euler_corrected = LandmarkDetector::RotationMatrix2Euler(corrected_rotation); - + return cv::Vec6d(vec_trans[0], vec_trans[1], vec_trans[2], euler_corrected[0], euler_corrected[1], euler_corrected[2]); } else { - return cv::Vec6d(0,0,0,0,0,0); + return cv::Vec6d(0, 0, 0, 0, 0, 0); } } diff --git a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp index e9cb5eb..959aa70 100644 --- a/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp +++ b/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp @@ -97,7 +97,7 @@ void create_directories(string output_path) // Extracting the following command line arguments -f, -op, -of, -ov (and possible ordered repetitions) void get_video_input_output_params(vector &input_video_files, vector &output_files, - vector &output_video_files, bool& camera_coordinates_pose, string& output_codec, vector &arguments) + vector &output_video_files, string& output_codec, vector &arguments) { bool* valid = new bool[arguments.size()]; @@ -106,9 +106,6 @@ void get_video_input_output_params(vector &input_video_files, vector &input_video_files, vector