add face recognition support to the system

2024-04-05 11:24:04 +02:00
parent b2ffbfa530
commit a279001151
5 changed files with 283 additions and 37 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,7 +5,7 @@ find_package(OpenCV REQUIRED)

 set(CMAKE_CXX_STANDARD 17)

-set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
+set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)

 add_executable(${PROJECT_NAME} ${SRC_FILES})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
--- a/facerecognizer.cpp
+++ b/facerecognizer.cpp
@ -0,0 +1,136 @@
+#include "facerecognizer.h"
+#include <filesystem>
+
+#define INCBIN_PREFIX r
+#include "incbin.h"
+
+INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
+INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
+
+#include <opencv2/dnn/dnn.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <fstream>
+
+#include "log.h"
+
+static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
+
+FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
+{
+	if(detectorPath.empty())
+	{
+		Log(Log::INFO)<<"Using builtin face detection model";
+
+		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from built in file");
+	}
+	else
+	{
+		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from "+detectorPath.string());
+	}
+
+	bool defaultNetwork = recognizerPath.empty();
+
+	if(defaultNetwork)
+	{
+		Log(Log::INFO)<<"Using builtin face recognition model";
+		recognizerPath = cv::tempfile("onnx");
+		std::ofstream file(recognizerPath);
+		if(!file.is_open())
+			throw LoadException("Unable open temporary file at "+recognizerPath.string());
+		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
+		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
+		file.close();
+	}
+
+	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+
+	if(defaultNetwork)
+		std::filesystem::remove(recognizerPath);
+
+	if(!recognizer)
+		throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
+
+	addReferances(referances);
+}
+
+cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
+{
+	detector->setInputSize(input.size());
+	cv::Mat faces;
+	detector->detect(input, faces);
+	return faces;
+}
+
+bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
+{
+	bool ret = false;
+	for(const cv::Mat& image : referances)
+	{
+		cv::Mat faces = detectFaces(image);
+		assert(faces.cols == 15);
+		if(faces.empty())
+		{
+			Log(Log::WARN)<<"A referance image provided dose not contian any face";
+			continue;
+		}
+		if(faces.rows > 1)
+			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
+		cv::Mat cropedImage;
+		recognizer->alignCrop(image, faces.row(0), cropedImage);
+		cv::Mat features;
+		recognizer->feature(cropedImage, features);
+		referanceFeatures.push_back(features.clone());
+		ret = true;
+	}
+
+	return ret;
+}
+
+void FaceRecognizer::setThreshold(double threasholdIn)
+{
+	threshold = threasholdIn;
+}
+
+double FaceRecognizer::getThreshold()
+{
+	return threshold;
+}
+
+void FaceRecognizer::clearReferances()
+{
+	referanceFeatures.clear();
+}
+
+std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
+{
+	cv::Mat faces = detectFaces(input);
+
+	if(alone && faces.rows > 1)
+		return {-2, 0};
+
+	std::pair<int, double> bestMatch = {-1, 0};
+
+	for(int i = 0; i < faces.rows; ++i)
+	{
+		cv::Mat face;
+		recognizer->alignCrop(input, faces.row(0), face);
+		cv::Mat features;
+		recognizer->feature(face, features);
+		features = features.clone();
+		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
+		{
+			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
+			if(score > threshold && score > bestMatch.second)
+			{
+				bestMatch = {referanceIndex, score};
+			}
+		}
+	}
+
+	return bestMatch;
+}
--- a/facerecognizer.h
+++ b/facerecognizer.h
@ -0,0 +1,41 @@
+#pragma once
+#include <exception>
+#include <opencv2/core/mat.hpp>
+#include <opencv2/objdetect/face.hpp>
+#include <opencv2/core.hpp>
+#include <vector>
+#include <memory>
+#include <filesystem>
+
+class FaceRecognizer
+{
+public:
+
+	class LoadException : public std::exception
+	{
+	private:
+		std::string message;
+	public:
+		LoadException(const std::string& msg): std::exception(), message(msg) {}
+		virtual const char* what() const throw() override
+		{
+			return message.c_str();
+		}
+	};
+
+private:
+	std::vector<cv::Mat> referanceFeatures;
+	std::shared_ptr<cv::FaceRecognizerSF> recognizer;
+	std::shared_ptr<cv::FaceDetectorYN> detector;
+
+	double threshold = 0.363;
+
+public:
+	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
+	cv::Mat detectFaces(const cv::Mat& input);
+	std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
+	bool addReferances(const std::vector<cv::Mat>& referances);
+	void setThreshold(double threashold);
+	double getThreshold();
+	void clearReferances();
+};
--- a/main.cpp
+++ b/main.cpp
@ -6,6 +6,7 @@
 #include <opencv2/highgui.hpp>
 #include <algorithm>
 #include <execution>
+#include <string>
 #include <vector>
 #include <numeric>

@ -15,6 +16,7 @@
 #include "utils.h"
 #include "intelligentroi.h"
 #include "seamcarving.h"
+#include "facerecognizer.h"

 const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
 {
@ -223,7 +225,7 @@ void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo:
 	for(const Yolo::Detection& detection : detections)
 	{
 		cv::rectangle(image, detection.box, detection.color, 3);
-		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
+		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
 		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
 		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
 		cv::rectangle(image, textBox, detection.color, cv::FILLED);
@ -251,7 +253,8 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
 	}
 }

-void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, const std::filesystem::path& debugOutputPath)
+void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, std::mutex& yoloMutex, FaceRecognizer* recognizer,
+	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
 {
 	InteligentRoi intRoi(yolo);
 	cv::Mat image = cv::imread(path);
@ -263,11 +266,28 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol

 	reduceSize(image, config.targetSize);

+	yoloMutex.lock();
 	std::vector<Yolo::Detection> detections = yolo.runInference(image);
+	yoloMutex.unlock();

 	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
-	for(const Yolo::Detection& detection : detections)
-		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
+	for(Yolo::Detection& detection : detections)
+	{
+		bool hasmatch = false;
+		if(recognizer && detection.className == "person")
+		{
+			cv::Mat person = image(detection.box);
+			reconizerMutex.lock();
+			std::pair<int, double> match = recognizer->isMatch(person);
+			reconizerMutex.unlock();
+			if(match.first >= 0)
+			{
+				detection.priority += 10;
+				hasmatch = true;
+			}
+		}
+		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
+	}

 	cv::Rect crop;
 	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
@ -276,7 +296,11 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	{
 		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
 		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
+		{
+			yoloMutex.lock();
 			detections = yolo.runInference(image);
+			yoloMutex.unlock();
+		}
 	}

 	cv::Mat croppedImage;
@ -306,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	}

 	cv::Mat resizedImage;
-	cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
+	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
 	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
 	if(!ret)
 		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
@ -346,7 +370,7 @@ int main(int argc, char* argv[])
 		return 1;
 	}

-	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
+	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, true);

 	if(!std::filesystem::exists(config.outputDir))
 	{
@ -364,8 +388,28 @@ int main(int argc, char* argv[])
 			std::filesystem::create_directory(debugOutputPath);
 	}

-	std::for_each(std::execution::parallel_unsequenced_policy(),
-				  imagePaths.begin(), imagePaths.end(), [&yolo, &debugOutputPath, &config](const std::filesystem::path& path){pipeline(path, config, yolo, debugOutputPath);});
+	FaceRecognizer* recognizer = nullptr;
+	std::mutex recognizerMutex;
+	if(!config.focusPersonImage.empty())
+	{
+		cv::Mat personImage = cv::imread(config.focusPersonImage);
+		if(personImage.empty())
+		{
+			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
+			return 1;
+		}
+		recognizer = new FaceRecognizer();
+		recognizer->addReferances({personImage});
+		recognizer->setThreshold(config.threshold);
+	}
+
+	std::mutex yoloMutex;
+
+	auto pipelineLambda = [&yolo, &debugOutputPath, &config, &yoloMutex, &recognizer, &recognizerMutex](const std::filesystem::path& path)
+	{
+		pipeline(path, config, yolo, yoloMutex, recognizer, recognizerMutex, debugOutputPath);
+	};
+	std::for_each(std::execution::par_unseq, imagePaths.begin(), imagePaths.end(), pipelineLambda);

 	return 0;
 }
--- a/options.h
+++ b/options.h
@ -20,7 +20,10 @@ static struct argp_option options[] =
  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
  {"debug", 		'd', 0,				0,	"output debug images" },
-  {"seam-carving", 	's', 0,				0,	"model to train"},
+  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
+  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
+  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
+  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
  {0}
 };

@ -30,14 +33,18 @@ struct Config
 	std::filesystem::path modelPath;
 	std::filesystem::path classesPath;
 	std::filesystem::path outputDir;
+	std::filesystem::path focusPersonImage;
 	bool seamCarving = false;
 	bool debug = false;
+	double threshold = 0.363;
 	cv::Size targetSize = cv::Size(512, 512);
 };

 static error_t parse_opt (int key, char *arg, struct argp_state *state)
 {
 	Config *config = reinterpret_cast<Config*>(state->input);
+	try
+	{
 		switch (key)
 		{
 		case 'q':
@ -61,12 +68,30 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
 		case 's':
 			config->seamCarving = true;
 			break;
+		case 'f':
+			config->focusPersonImage = arg;
+			break;
+		case 't':
+			config->threshold = std::atof(arg);
+			break;
+		case 'z':
+		{
+			int x = std::stoi(arg);
+			config->targetSize = cv::Size(x, x);
+			break;
+		}
 		case ARGP_KEY_ARG:
 			config->imagePaths.push_back(arg);
 			break;
 		default:
 			return ARGP_ERR_UNKNOWN;
 		}
+	}
+	catch(const std::invalid_argument& ex)
+	{
+		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
+		return ARGP_KEY_ERROR;
+	}
 	return 0;
 }