Simplifying head pose estimation.
This commit is contained in:
parent
6eb46088f8
commit
9900e8b0de
14 changed files with 50 additions and 113 deletions
|
@ -466,7 +466,7 @@ int main (int argc, char **argv)
|
|||
bool success = LandmarkDetector::DetectLandmarksInImage(grayscale_image, depth_image, face_detections[face], clnf_model, det_parameters);
|
||||
|
||||
// Estimate head pose and eye gaze
|
||||
cv::Vec6d headPose = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy);
|
||||
|
||||
// Gaze tracking, absolute gaze direction
|
||||
cv::Point3f gazeDirection0(0, 0, -1);
|
||||
|
@ -521,7 +521,7 @@ int main (int argc, char **argv)
|
|||
|
||||
if (det_parameters.track_gaze)
|
||||
{
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy);
|
||||
|
||||
// Draw it in reddish if uncertain, blueish if certain
|
||||
LandmarkDetector::DrawBox(read_image, pose_estimate_to_draw, cv::Scalar(255.0, 0, 0), 3, fx, fy, cx, cy);
|
||||
|
@ -582,7 +582,7 @@ int main (int argc, char **argv)
|
|||
LandmarkDetector::DetectLandmarksInImage(grayscale_image, bounding_boxes[i], clnf_model, det_parameters);
|
||||
|
||||
// Estimate head pose and eye gaze
|
||||
cv::Vec6d headPose = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy);
|
||||
|
||||
// Gaze tracking, absolute gaze direction
|
||||
cv::Point3f gazeDirection0(0, 0, -1);
|
||||
|
@ -615,7 +615,7 @@ int main (int argc, char **argv)
|
|||
|
||||
if (det_parameters.track_gaze)
|
||||
{
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy);
|
||||
|
||||
// Draw it in reddish if uncertain, blueish if certain
|
||||
LandmarkDetector::DrawBox(read_image, pose_estimate_to_draw, cv::Scalar(255.0, 0, 0), 3, fx, fy, cx, cy);
|
||||
|
|
|
@ -136,7 +136,7 @@ void visualise_tracking(cv::Mat& captured_image, cv::Mat_<float>& depth_image, c
|
|||
// A rough heuristic for box around the face width
|
||||
int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0);
|
||||
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy);
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy);
|
||||
|
||||
// Draw it in reddish if uncertain, blueish if certain
|
||||
LandmarkDetector::DrawBox(captured_image, pose_estimate_to_draw, cv::Scalar((1 - vis_certainty)*255.0, 0, vis_certainty * 255), thickness, fx, fy, cx, cy);
|
||||
|
@ -192,9 +192,8 @@ int main (int argc, char **argv)
|
|||
// Get the input output file parameters
|
||||
|
||||
// Indicates that rotation should be with respect to world or camera coordinates
|
||||
bool u;
|
||||
string output_codec;
|
||||
LandmarkDetector::get_video_input_output_params(files, depth_directories, out_dummy, output_video_files, u, output_codec, arguments);
|
||||
LandmarkDetector::get_video_input_output_params(files, depth_directories, out_dummy, output_video_files, output_codec, arguments);
|
||||
|
||||
// The modules that are being used for tracking
|
||||
LandmarkDetector::CLNF clnf_model(det_parameters.model_location);
|
||||
|
|
|
@ -150,9 +150,8 @@ int main (int argc, char **argv)
|
|||
det_parameters.push_back(det_params);
|
||||
|
||||
// Get the input output file parameters
|
||||
bool u;
|
||||
string output_codec;
|
||||
LandmarkDetector::get_video_input_output_params(files, depth_directories, dummy_out, tracked_videos_output, u, output_codec, arguments);
|
||||
LandmarkDetector::get_video_input_output_params(files, depth_directories, dummy_out, tracked_videos_output, output_codec, arguments);
|
||||
// Get camera parameters
|
||||
LandmarkDetector::get_camera_params(device, fx, fy, cx, cy, arguments);
|
||||
|
||||
|
@ -404,7 +403,7 @@ int main (int argc, char **argv)
|
|||
int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0);
|
||||
|
||||
// Work out the pose of the head from the tracked model
|
||||
cv::Vec6d pose_estimate = LandmarkDetector::GetCorrectedPoseWorld(clnf_models[model], fx, fy, cx, cy);
|
||||
cv::Vec6d pose_estimate = LandmarkDetector::GetPose(clnf_models[model], fx, fy, cx, cy);
|
||||
|
||||
// Draw it in reddish if uncertain, blueish if certain
|
||||
LandmarkDetector::DrawBox(disp_image, pose_estimate, cv::Scalar((1-detection_certainty)*255.0,0, detection_certainty*255), thickness, fx, fy, cx, cy);
|
||||
|
|
|
@ -190,7 +190,7 @@ void visualise_tracking(cv::Mat& captured_image, const LandmarkDetector::CLNF& f
|
|||
// A rough heuristic for box around the face width
|
||||
int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0);
|
||||
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy);
|
||||
cv::Vec6d pose_estimate_to_draw = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy);
|
||||
|
||||
// Draw it in reddish if uncertain, blueish if certain
|
||||
LandmarkDetector::DrawBox(captured_image, pose_estimate_to_draw, cv::Scalar((1 - vis_certainty)*255.0, 0, vis_certainty * 255), thickness, fx, fy, cx, cy);
|
||||
|
@ -252,9 +252,8 @@ int main (int argc, char **argv)
|
|||
// Get the input output file parameters
|
||||
|
||||
// Indicates that rotation should be with respect to camera or world coordinates
|
||||
bool use_world_coordinates;
|
||||
string output_codec; //not used but should
|
||||
LandmarkDetector::get_video_input_output_params(input_files, depth_directories, output_files, tracked_videos_output, use_world_coordinates, output_codec, arguments);
|
||||
LandmarkDetector::get_video_input_output_params(input_files, depth_directories, output_files, tracked_videos_output, output_codec, arguments);
|
||||
|
||||
bool video_input = true;
|
||||
bool verbose = true;
|
||||
|
@ -596,15 +595,7 @@ int main (int argc, char **argv)
|
|||
}
|
||||
|
||||
// Work out the pose of the head from the tracked model
|
||||
cv::Vec6d pose_estimate;
|
||||
if(use_world_coordinates)
|
||||
{
|
||||
pose_estimate = LandmarkDetector::GetCorrectedPoseWorld(face_model, fx, fy, cx, cy);
|
||||
}
|
||||
else
|
||||
{
|
||||
pose_estimate = LandmarkDetector::GetCorrectedPoseCamera(face_model, fx, fy, cx, cy);
|
||||
}
|
||||
cv::Vec6d pose_estimate = LandmarkDetector::GetPose(face_model, fx, fy, cx, cy);
|
||||
|
||||
if(hog_output_file.is_open())
|
||||
{
|
||||
|
|
|
@ -263,7 +263,7 @@ namespace CppInterop {
|
|||
}
|
||||
|
||||
void GetCorrectedPoseCamera(System::Collections::Generic::List<double>^ pose, double fx, double fy, double cx, double cy) {
|
||||
auto pose_vec = ::LandmarkDetector::GetCorrectedPoseCamera(*clnf, fx, fy, cx, cy);
|
||||
auto pose_vec = ::LandmarkDetector::GetPoseWRTCamera(*clnf, fx, fy, cx, cy);
|
||||
pose->Clear();
|
||||
for(int i = 0; i < 6; ++i)
|
||||
{
|
||||
|
@ -272,7 +272,7 @@ namespace CppInterop {
|
|||
}
|
||||
|
||||
void GetCorrectedPoseWorld(System::Collections::Generic::List<double>^ pose, double fx, double fy, double cx, double cy) {
|
||||
auto pose_vec = ::LandmarkDetector::GetCorrectedPoseWorld(*clnf, fx, fy, cx, cy);
|
||||
auto pose_vec = ::LandmarkDetector::GetPose(*clnf, fx, fy, cx, cy);
|
||||
pose->Clear();
|
||||
for(int i = 0; i < 6; ++i)
|
||||
{
|
||||
|
@ -322,7 +322,7 @@ namespace CppInterop {
|
|||
|
||||
System::Collections::Generic::List<System::Tuple<System::Windows::Point, System::Windows::Point>^>^ CalculateBox(float fx, float fy, float cx, float cy) {
|
||||
|
||||
cv::Vec6d pose = ::LandmarkDetector::GetCorrectedPoseWorld(*clnf, fx,fy, cx, cy);
|
||||
cv::Vec6d pose = ::LandmarkDetector::GetPose(*clnf, fx,fy, cx, cy);
|
||||
|
||||
vector<pair<cv::Point2d, cv::Point2d>> vecLines = ::LandmarkDetector::CalculateBox(pose, fx, fy, cx, cy);
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ cv::Point3f FaceAnalysis::GetPupilPosition(cv::Mat_<double> eyeLdmks3d){
|
|||
|
||||
void FaceAnalysis::EstimateGaze(const LandmarkDetector::CLNF& clnf_model, cv::Point3f& gaze_absolute, float fx, float fy, float cx, float cy, bool left_eye)
|
||||
{
|
||||
cv::Vec6d headPose = LandmarkDetector::GetPoseCamera(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec6d headPose = LandmarkDetector::GetPose(clnf_model, fx, fy, cx, cy);
|
||||
cv::Vec3d eulerAngles(headPose(3), headPose(4), headPose(5));
|
||||
cv::Matx33d rotMat = LandmarkDetector::Euler2RotationMatrix(eulerAngles);
|
||||
|
||||
|
|
|
@ -100,20 +100,12 @@ namespace LandmarkDetector
|
|||
//================================================================
|
||||
// Helper function for getting head pose from CLNF parameters
|
||||
|
||||
// Return the current estimate of the head pose, this can be either in camera or world coordinate space
|
||||
// Return the current estimate of the head pose in world coordinates with camera at origin (0,0,0)
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d GetPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
cv::Vec6d GetPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
cv::Vec6d GetPose(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction for perspective
|
||||
// This is because rotation estimate under orthographic assumption is only correct close to the centre of the image
|
||||
// These methods attempt to correct for that
|
||||
// The pose returned can be either in camera or world coordinates
|
||||
// Return the current estimate of the head pose in world coordinates with camera at origin (0,0,0), but with rotation representing if the head is looking at the camera
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d GetCorrectedPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
cv::Vec6d GetCorrectedPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
|
||||
//===========================================================================
|
||||
|
||||
cv::Vec6d GetPoseWRTCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -77,7 +77,7 @@ namespace LandmarkDetector
|
|||
// Helper functions for parsing the inputs
|
||||
//=============================================================================================
|
||||
void get_video_input_output_params(vector<string> &input_video_file, vector<string> &depth_dir, vector<string> &output_files,
|
||||
vector<string> &output_video_files, bool& world_coordinates_pose, string &output_codec, vector<string> &arguments);
|
||||
vector<string> &output_video_files, string &output_codec, vector<string> &arguments);
|
||||
|
||||
void get_camera_params(int &device, float &fx, float &fy, float &cx, float &cy, vector<string> &arguments);
|
||||
|
||||
|
|
|
@ -70,63 +70,11 @@
|
|||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks (rotation with respect to point camera)
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to the PDM assuming an orthographic camera
|
||||
// which is only correct close to the centre of the image
|
||||
// This method returns a corrected pose estimate with respect to world coordinates with camera at origin (0,0,0)
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d LandmarkDetector::GetPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
{
|
||||
if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
double Z = fx / clnf_model.params_global[0];
|
||||
|
||||
double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z;
|
||||
double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z;
|
||||
|
||||
return cv::Vec6d(X, Y, Z, clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
return cv::Vec6d(0,0,0,0,0,0);
|
||||
}
|
||||
}
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks (rotation in world coordinates)
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d LandmarkDetector::GetPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
{
|
||||
if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
double Z = fx / clnf_model.params_global[0];
|
||||
|
||||
double X = ((clnf_model.params_global[4] - cx) * (1.0/fx)) * Z;
|
||||
double Y = ((clnf_model.params_global[5] - cy) * (1.0/fy)) * Z;
|
||||
|
||||
// Here we correct for the camera orientation, for this need to determine the angle the camera makes with the head pose
|
||||
double z_x = cv::sqrt(X * X + Z * Z);
|
||||
double eul_x = atan2(Y, z_x);
|
||||
|
||||
double z_y = cv::sqrt(Y * Y + Z * Z);
|
||||
double eul_y = -atan2(X, z_y);
|
||||
|
||||
cv::Matx33d camera_rotation = LandmarkDetector::Euler2RotationMatrix(cv::Vec3d(eul_x, eul_y, 0));
|
||||
cv::Matx33d head_rotation = LandmarkDetector::AxisAngle2RotationMatrix(cv::Vec3d(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]));
|
||||
|
||||
cv::Matx33d corrected_rotation = camera_rotation.t() * head_rotation;
|
||||
|
||||
cv::Vec3d euler_corrected = LandmarkDetector::RotationMatrix2Euler(corrected_rotation);
|
||||
|
||||
return cv::Vec6d(X, Y, Z, euler_corrected[0], euler_corrected[1], euler_corrected[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
return cv::Vec6d(0,0,0,0,0,0);
|
||||
}
|
||||
}
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to orthographic camera issue
|
||||
// This is because rotation estimate under orthographic assumption is only correct close to the centre of the image
|
||||
// This method returns a corrected pose estimate with respect to world coordinates (Experimental)
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d LandmarkDetector::GetCorrectedPoseWorld(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
cv::Vec6d LandmarkDetector::GetPose(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
{
|
||||
if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
|
@ -161,7 +109,7 @@ cv::Vec6d LandmarkDetector::GetCorrectedPoseWorld(const CLNF& clnf_model, double
|
|||
|
||||
cv::Vec3d euler = LandmarkDetector::AxisAngle2Euler(vec_rot);
|
||||
|
||||
return cv::Vec6d(vec_trans[0], vec_trans[1], vec_trans[2], vec_rot[0], vec_rot[1], vec_rot[2]);
|
||||
return cv::Vec6d(vec_trans[0], vec_trans[1], vec_trans[2], euler[0], euler[1], euler[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -170,9 +118,9 @@ cv::Vec6d LandmarkDetector::GetCorrectedPoseWorld(const CLNF& clnf_model, double
|
|||
}
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to perspective projection
|
||||
// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates) (Experimental)
|
||||
// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates), which is useful to find out if the person is looking at a camera
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6d LandmarkDetector::GetCorrectedPoseCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
cv::Vec6d LandmarkDetector::GetPoseWRTCamera(const CLNF& clnf_model, double fx, double fy, double cx, double cy)
|
||||
{
|
||||
if(!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
|
|
|
@ -121,7 +121,7 @@ void create_directories(string output_path)
|
|||
|
||||
// Extracting the following command line arguments -f, -fd, -op, -of, -ov (and possible ordered repetitions)
|
||||
void get_video_input_output_params(vector<string> &input_video_files, vector<string> &depth_dirs, vector<string> &output_files,
|
||||
vector<string> &output_video_files, bool& world_coordinates_pose, string& output_codec, vector<string> &arguments)
|
||||
vector<string> &output_video_files, string& output_codec, vector<string> &arguments)
|
||||
{
|
||||
bool* valid = new bool[arguments.size()];
|
||||
|
||||
|
@ -130,9 +130,6 @@ void get_video_input_output_params(vector<string> &input_video_files, vector<str
|
|||
valid[i] = true;
|
||||
}
|
||||
|
||||
// By default use rotation with respect to camera (not world coordinates)
|
||||
world_coordinates_pose = false;
|
||||
|
||||
// By default use DIVX codec
|
||||
output_codec = "DIVX";
|
||||
|
||||
|
@ -196,10 +193,6 @@ void get_video_input_output_params(vector<string> &input_video_files, vector<str
|
|||
valid[i+1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-world_coord") == 0)
|
||||
{
|
||||
world_coordinates_pose = true;
|
||||
}
|
||||
else if (arguments[i].compare("-oc") == 0)
|
||||
{
|
||||
if(arguments[i + 1].length() == 4)
|
||||
|
|
|
@ -23,6 +23,7 @@ for i=1:numel(seqNames)
|
|||
|
||||
rotg{i} = posesGround(:,[5 6 7]);
|
||||
rot{i} = [rx ry rz];
|
||||
T = [tx ty tx];
|
||||
|
||||
% Correct the first frame so it corresponds to (0,0,0), as slightly
|
||||
% different pose might be assumed frontal and this corrects for
|
||||
|
@ -36,6 +37,20 @@ for i=1:numel(seqNames)
|
|||
rotg{i}(r_e,:) = Rot2Euler(rot_new_gt);
|
||||
end
|
||||
|
||||
% First move the orientation to camera space
|
||||
zx = sqrt(tx.^2 + tz.^2);
|
||||
eul_x = atan2(ty, zx);
|
||||
|
||||
zy = sqrt(ty.^2 + tz.^2);
|
||||
eul_y = -atan2(tx, zy);
|
||||
for r_e = 1:size(rot{i},1)
|
||||
cam_rot = Euler2Rot([eul_x(r_e), eul_y(r_e), 0]);
|
||||
h_rot = Euler2Rot(rot{i}(r_e,:));
|
||||
|
||||
c_rot = cam_rot * h_rot;
|
||||
rot{i}(r_e,:) = Rot2Euler(c_rot);
|
||||
end
|
||||
|
||||
% Work out the correction matrix for estimates
|
||||
rot_corr_est = Euler2Rot(rot{i}(1,:));
|
||||
for r_e = 1:size(rot{i},1)
|
||||
|
|
Binary file not shown.
|
@ -1,4 +1,4 @@
|
|||
Dataset and model, pitch, yaw, roll, mean, median
|
||||
biwi error: 7.955, 5.583, 4.402, 5.980, 2.624
|
||||
bu error: 2.762, 4.103, 2.568, 3.145, 2.118
|
||||
ict error: 3.619, 3.606, 3.625, 3.617, 2.027
|
||||
biwi error: 7.955, 5.583, 4.403, 5.980, 2.624
|
||||
bu error: 2.734, 3.350, 2.459, 2.848, 1.974
|
||||
ict error: 3.610, 3.959, 3.369, 3.646, 1.967
|
||||
|
|
|
@ -41,7 +41,7 @@ for i=3:numTogether:numel(dbSeqDir)
|
|||
inputFile = [ictDir dbSeqDir(i+n).name '/colour undist.avi'];
|
||||
outputFile = [outputDir dbSeqDir(i+n).name '.txt'];
|
||||
|
||||
command = cat(2, command, [' -f "' inputFile '" -op "' outputFile '" ']);
|
||||
command = cat(2, command, [' -f "' inputFile '" -of "' outputFile '" ']);
|
||||
|
||||
if(depth)
|
||||
dDir = [ictDir dbSeqDir(i+n).name '/depthAligned/'];
|
||||
|
|
Loading…
Reference in a new issue