From 8947395ca59b24620c50bed484170e7588403670 Mon Sep 17 00:00:00 2001
From: Tadas Baltrusaitis <tadyla@gmail.com>
Date: Tue, 14 Nov 2017 19:59:08 +0000
Subject: [PATCH] Working on a new image interface.

---
 exe/FaceLandmarkImg/FaceLandmarkImg.cpp    | 95 ++++++++--------------
 lib/local/Utilities/include/ImageCapture.h | 10 ++-
 lib/local/Utilities/src/ImageCapture.cpp   | 56 ++++++++++++-
 3 files changed, 98 insertions(+), 63 deletions(-)
diff --git a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
index 6133cde..760dfbd 100644
--- a/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
+++ b/exe/FaceLandmarkImg/FaceLandmarkImg.cpp
@@ -54,6 +54,13 @@
 #include <FaceAnalyser.h>
 #include <GazeEstimation.h>
 
+#include <ImageCapture.h>
+#include <Visualizer.h>
+#include <VisualizationUtils.h>
+#include <RecorderOpenFace.h>
+#include <RecorderOpenFaceParameters.h>
+
+
 #ifndef CONFIG_DIR
 #define CONFIG_DIR "~"
 #endif
@@ -141,84 +148,52 @@ int main (int argc, char **argv)
 	//Convert arguments to more convenient vector form
 	vector<string> arguments = get_arguments(argc, argv);
 
-	// Some initial parameters that can be overriden from command line
-	vector<string> files, output_images, output_landmark_locations, output_pose_locations;
+	// Prepare for image reading
+	Utilities::ImageCapture image_reader;
 
-	// Bounding boxes for a face in each image (optional)
-	vector<cv::Rect_<double> > bounding_boxes;
-	
-	LandmarkDetector::get_image_input_output_params(files, output_landmark_locations, output_pose_locations, output_images, bounding_boxes, arguments);
-	LandmarkDetector::FaceModelParameters det_parameters(arguments);	
+	// A utility for visualizing the results
+	Utilities::Visualizer visualizer(arguments);
+
+	// The sequence reader chooses what to open based on command line arguments provided
+	if (!image_reader.Open(arguments))
+	{
+		cout << "Could not open any images" << endl;
+		return 1;
+	}
+
+	// Load the models if images found
+	LandmarkDetector::FaceModelParameters det_parameters(arguments);
 	// No need to validate detections, as we're not doing tracking
 	det_parameters.validate_detections = false;
 
-	// Grab camera parameters if provided (only used for pose and eye gaze and are quite important for accurate estimates)
-	float fx = 0, fy = 0, cx = 0, cy = 0;
-	int device = -1;
-	LandmarkDetector::get_camera_params(device, fx, fy, cx, cy, arguments);
-
-	// If cx (optical axis centre) is undefined will use the image size/2 as an estimate
-	bool cx_undefined = false;
-	bool fx_undefined = false;
-	if (cx == 0 || cy == 0)
-	{
-		cx_undefined = true;
-	}
-	if (fx == 0 || fy == 0)
-	{
-		fx_undefined = true;
-	}
-
 	// The modules that are being used for tracking
 	cout << "Loading the model" << endl;
-	LandmarkDetector::CLNF clnf_model(det_parameters.model_location);
+	LandmarkDetector::CLNF face_model(det_parameters.model_location);
 	cout << "Model loaded" << endl;
-	
-	cv::CascadeClassifier classifier(det_parameters.face_detector_location);
-	dlib::frontal_face_detector face_detector_hog = dlib::get_frontal_face_detector();
 
 	// Load facial feature extractor and AU analyser (make sure it is static)
 	FaceAnalysis::FaceAnalyserParameters face_analysis_params(arguments);
 	face_analysis_params.OptimizeForImages();
 	FaceAnalysis::FaceAnalyser face_analyser(face_analysis_params);
 
-	bool visualise = !det_parameters.quiet_mode;
 
-	// Do some image loading
-	for(size_t i = 0; i < files.size(); i++)
+	// If bounding boxes not provided, use a face detector
+	cv::CascadeClassifier classifier(det_parameters.face_detector_location);
+	dlib::frontal_face_detector face_detector_hog = dlib::get_frontal_face_detector();
+
+	cv::Mat captured_image;
+
+	captured_image = image_reader.GetNextImage();
+
+	cout << "Starting tracking" << endl;
+	while (!captured_image.empty())
 	{
-		string file = files.at(i);
 
-		// Loading image
-		cv::Mat read_image = cv::imread(file, -1);
+		Utilities::RecorderOpenFaceParameters recording_params(arguments, false);
+		Utilities::RecorderOpenFace open_face_rec(image_reader.name, recording_params, arguments);
 
-		if (read_image.empty())
-		{
-			cout << "Could not read the input image" << endl;
-			return 1;
-		}
-		
 		// Making sure the image is in uchar grayscale
-		cv::Mat_<uchar> grayscale_image;
-		convert_to_grayscale(read_image, grayscale_image);
-		
-
-		// If optical centers are not defined just use center of image
-		if (cx_undefined)
-		{
-			cx = grayscale_image.cols / 2.0f;
-			cy = grayscale_image.rows / 2.0f;
-		}
-		// Use a rough guess-timate of focal length
-		if (fx_undefined)
-		{
-			fx = 500 * (grayscale_image.cols / 640.0);
-			fy = 500 * (grayscale_image.rows / 480.0);
-
-			fx = (fx + fy) / 2.0;
-			fy = fx;
-		}
-
+		cv::Mat_<uchar> grayscale_image = image_reader.GetGrayFrame();
 
 		// if no pose defined we just use a face detector
 		if(bounding_boxes.empty())
diff --git a/lib/local/Utilities/include/ImageCapture.h b/lib/local/Utilities/include/ImageCapture.h
index 9d239a9..1d8758a 100644
--- a/lib/local/Utilities/include/ImageCapture.h
+++ b/lib/local/Utilities/include/ImageCapture.h
@@ -65,7 +65,7 @@ namespace Utilities
 		// Direct opening
 
 		// Image sequence in the directory
-		bool OpenDirectory(std::string directory, float fx = -1, float fy = -1, float cx = -1, float cy = -1);
+		bool OpenDirectory(std::string directory, std::string bbox_directory="", float fx = -1, float fy = -1, float cx = -1, float cy = -1);
 
 		// Video file
 		bool OpenImageFiles(const std::vector<std::string>& image_files, float fx = -1, float fy = -1, float cx = -1, float cy = -1);
@@ -76,6 +76,9 @@ namespace Utilities
 		// Getting the most recent grayscale frame (need to call GetNextImage first)
 		cv::Mat_<uchar> GetGrayFrame();
 
+		// Return bounding boxes associated with the image (if defined)
+		std::vector<cv::Rect_<double> > GetBoundingBoxes();
+
 		// Parameters describing the sequence and it's progress (what's the proportion of images opened)
 		double GetProgress();
 
@@ -87,6 +90,8 @@ namespace Utilities
 		// Name of the video file, image directory, or the webcam
 		std::string name;
 
+		bool has_bounding_boxes;
+
 	private:
 
 		// Storing the latest captures
@@ -97,6 +102,9 @@ namespace Utilities
 		size_t  frame_num;
 		std::vector<std::string> image_files;
 
+		// Could optionally read the bounding box locations from files (each image could have multiple bounding boxes)
+		std::vector<std::vector<cv::Rect_<double> > > bounding_boxes;
+
 		void SetCameraIntrinsics(float fx, float fy, float cx, float cy);
 
 		// TODO make sure that can set fx, fy externally
diff --git a/lib/local/Utilities/src/ImageCapture.cpp b/lib/local/Utilities/src/ImageCapture.cpp
index 78920e9..93dfb63 100644
--- a/lib/local/Utilities/src/ImageCapture.cpp
+++ b/lib/local/Utilities/src/ImageCapture.cpp
@@ -88,8 +88,10 @@ bool ImageCapture::Open(std::vector<std::string>& arguments)
 	}
 
 	std::string input_directory;
+	std::string bbox_directory;
 
 	bool directory_found = false;
+	has_bounding_boxes = false;
 
 	std::vector<std::string> input_image_files;
 
@@ -117,6 +119,14 @@ bool ImageCapture::Open(std::vector<std::string>& arguments)
 				directory_found = true;
 			}
 		}
+		else if (arguments[i].compare("-bboxdir") == 0)
+		{
+			bbox_directory = (input_root + arguments[i + 1]);
+			valid[i] = false;
+			valid[i + 1] = false;
+			has_bounding_boxes = true;
+			i++;
+		}
 		else if (arguments[i].compare("-fx") == 0)
 		{
 			std::stringstream data(arguments[i + 1]);
@@ -158,7 +168,7 @@ bool ImageCapture::Open(std::vector<std::string>& arguments)
 	}
 	if (!input_directory.empty())
 	{
-		return OpenDirectory(input_directory, fx, fy, cx, cy);
+		return OpenDirectory(input_directory, bbox_directory, fx, fy, cx, cy);
 	}
 
 	// If no input found return false and set a flag for it
@@ -192,7 +202,7 @@ bool ImageCapture::OpenImageFiles(const std::vector<std::string>& image_files, f
 
 }
 
-bool ImageCapture::OpenDirectory(std::string directory, float fx, float fy, float cx, float cy)
+bool ImageCapture::OpenDirectory(std::string directory, std::string bbox_directory, float fx, float fy, float cx, float cy)
 {
 	INFO_STREAM("Attempting to read from directory: " << directory);
 
@@ -215,6 +225,36 @@ bool ImageCapture::OpenDirectory(std::string directory, float fx, float fy, floa
 		if (file_iterator->extension().string().compare(".jpg") == 0 || file_iterator->extension().string().compare(".jpeg") == 0 || file_iterator->extension().string().compare(".png") == 0 || file_iterator->extension().string().compare(".bmp") == 0)
 		{
 			curr_dir_files.push_back(file_iterator->string());
+
+			// If bounding box directory is specified, read the bounding boxes from it
+			if (!bbox_directory.empty())
+			{
+				boost::filesystem::path current_file = *file_iterator;
+				boost::filesystem::path bbox_file = current_file.replace_extension("txt");
+
+				// If there is a bounding box file push it to the list of bounding boxes
+				if (boost::filesystem::exists(bbox_file))
+				{
+					std::ifstream in_bbox(bbox_file.string().c_str(), std::ios_base::in);
+
+					std::vector<cv::Rect_<double> > bboxes_image;
+					while (!in_bbox.eof())
+					{
+						double min_x, min_y, max_x, max_y;
+
+						in_bbox >> min_x >> min_y >> max_x >> max_y;
+						bboxes_image.push_back(cv::Rect_<double>(min_x, min_y, max_x - min_x, max_y - min_y));
+					}
+					in_bbox.close();
+
+					bounding_boxes.push_back(bboxes_image);
+				}
+
+			}
+			else
+			{
+				ERROR_STREAM("Could not find the corresponding bounding box for file:" + file_iterator->string());
+			}
 		}
 	}
 
@@ -344,6 +384,18 @@ cv::Mat ImageCapture::GetNextImage()
 	return latest_frame;
 }
 
+std::vector<cv::Rect_<double> > ImageCapture::GetBoundingBoxes()
+{
+	if (!bounding_boxes.empty())
+	{
+		return bounding_boxes[frame_num - 1];
+	}
+	else
+	{
+		return std::vector<cv::Rect_<double> >();
+	}
+}
+
 double ImageCapture::GetProgress()
 {
 	return (double)frame_num / (double)image_files.size();