add face recognition support to the system

2024-04-05 11:24:04 +02:00 · 2024-04-05 11:24:04 +02:00 · a279001151
commit a279001151
parent b2ffbfa530
5 changed files with 283 additions and 37 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,7 +5,7 @@ find_package(OpenCV REQUIRED)
 set(CMAKE_CXX_STANDARD 17)
-set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
+set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
 add_executable(${PROJECT_NAME} ${SRC_FILES})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
--- a/facerecognizer.cpp
+++ b/facerecognizer.cpp
@ -0,0 +1,136 @@
 #include "facerecognizer.h"
 #include <filesystem>
 #define INCBIN_PREFIX r
 #include "incbin.h"
 INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
 INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
 #include <opencv2/dnn/dnn.hpp>
 #include <opencv2/core.hpp>
 #include <opencv2/highgui.hpp>
 #include <fstream>
 #include "log.h"
 static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
 FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
 {
 	if(detectorPath.empty())
 	{
 		Log(Log::INFO)<<"Using builtin face detection model";
 		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 		if(!detector)
 			throw LoadException("Unable to load detector network from built in file");
 	}
 	else
 	{
 		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 		if(!detector)
 			throw LoadException("Unable to load detector network from "+detectorPath.string());
 	}
 	bool defaultNetwork = recognizerPath.empty();
 	if(defaultNetwork)
 	{
 		Log(Log::INFO)<<"Using builtin face recognition model";
 		recognizerPath = cv::tempfile("onnx");
 		std::ofstream file(recognizerPath);
 		if(!file.is_open())
 			throw LoadException("Unable open temporary file at "+recognizerPath.string());
 		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
 		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
 		file.close();
 	}
 	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 	if(defaultNetwork)
 		std::filesystem::remove(recognizerPath);
 	if(!recognizer)
 		throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
 	addReferances(referances);
 }
 cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
 {
 	detector->setInputSize(input.size());
 	cv::Mat faces;
 	detector->detect(input, faces);
 	return faces;
 }
 bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
 {
 	bool ret = false;
 	for(const cv::Mat& image : referances)
 	{
 		cv::Mat faces = detectFaces(image);
 		assert(faces.cols == 15);
 		if(faces.empty())
 		{
 			Log(Log::WARN)<<"A referance image provided dose not contian any face";
 			continue;
 		}
 		if(faces.rows > 1)
 			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
 		cv::Mat cropedImage;
 		recognizer->alignCrop(image, faces.row(0), cropedImage);
 		cv::Mat features;
 		recognizer->feature(cropedImage, features);
 		referanceFeatures.push_back(features.clone());
 		ret = true;
 	}
 	return ret;
 }
 void FaceRecognizer::setThreshold(double threasholdIn)
 {
 	threshold = threasholdIn;
 }
 double FaceRecognizer::getThreshold()
 {
 	return threshold;
 }
 void FaceRecognizer::clearReferances()
 {
 	referanceFeatures.clear();
 }
 std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
 {
 	cv::Mat faces = detectFaces(input);
 	if(alone && faces.rows > 1)
 		return {-2, 0};
 	std::pair<int, double> bestMatch = {-1, 0};
 	for(int i = 0; i < faces.rows; ++i)
 	{
 		cv::Mat face;
 		recognizer->alignCrop(input, faces.row(0), face);
 		cv::Mat features;
 		recognizer->feature(face, features);
 		features = features.clone();
 		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
 		{
 			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
 			if(score > threshold && score > bestMatch.second)
 			{
 				bestMatch = {referanceIndex, score};
 			}
 		}
 	}
 	return bestMatch;
 }
--- a/facerecognizer.h
+++ b/facerecognizer.h
@ -0,0 +1,41 @@
 #pragma once
 #include <exception>
 #include <opencv2/core/mat.hpp>
 #include <opencv2/objdetect/face.hpp>
 #include <opencv2/core.hpp>
 #include <vector>
 #include <memory>
 #include <filesystem>
 class FaceRecognizer
 {
 public:
 	class LoadException : public std::exception
 	{
 	private:
 		std::string message;
 	public:
 		LoadException(const std::string& msg): std::exception(), message(msg) {}
 		virtual const char* what() const throw() override
 		{
 			return message.c_str();
 		}
 	};
 private:
 	std::vector<cv::Mat> referanceFeatures;
 	std::shared_ptr<cv::FaceRecognizerSF> recognizer;
 	std::shared_ptr<cv::FaceDetectorYN> detector;
 	double threshold = 0.363;
 public:
 	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
 	cv::Mat detectFaces(const cv::Mat& input);
 	std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
 	bool addReferances(const std::vector<cv::Mat>& referances);
 	void setThreshold(double threashold);
 	double getThreshold();
 	void clearReferances();
 };
--- a/main.cpp
+++ b/main.cpp
@ -6,6 +6,7 @@
 #include <opencv2/highgui.hpp>
 #include <algorithm>
 #include <execution>
 #include <string>
 #include <vector>
 #include <numeric>
@ -15,6 +16,7 @@
 #include "utils.h"
 #include "intelligentroi.h"
 #include "seamcarving.h"
 #include "facerecognizer.h"
 const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
 {
@ -223,7 +225,7 @@ void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo:
 	for(const Yolo::Detection& detection : detections)
 	{
 		cv::rectangle(image, detection.box, detection.color, 3);
-		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
+		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
 		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
 		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
 		cv::rectangle(image, textBox, detection.color, cv::FILLED);
@ -251,7 +253,8 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
 	}
 }
-void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, const std::filesystem::path& debugOutputPath)
+void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, std::mutex& yoloMutex, FaceRecognizer* recognizer,
 	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
 {
 	InteligentRoi intRoi(yolo);
 	cv::Mat image = cv::imread(path);
@ -263,11 +266,28 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	reduceSize(image, config.targetSize);
 	yoloMutex.lock();
 	std::vector<Yolo::Detection> detections = yolo.runInference(image);
 	yoloMutex.unlock();
 	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
-	for(const Yolo::Detection& detection : detections)
+	for(Yolo::Detection& detection : detections)
-		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
+	{
 		bool hasmatch = false;
 		if(recognizer && detection.className == "person")
 		{
 			cv::Mat person = image(detection.box);
 			reconizerMutex.lock();
 			std::pair<int, double> match = recognizer->isMatch(person);
 			reconizerMutex.unlock();
 			if(match.first >= 0)
 			{
 				detection.priority += 10;
 				hasmatch = true;
 			}
 		}
 		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
 	}
 	cv::Rect crop;
 	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
@ -276,7 +296,11 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	{
 		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
 		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
 		{
 			yoloMutex.lock();
 			detections = yolo.runInference(image);
 			yoloMutex.unlock();
 		}
 	}
 	cv::Mat croppedImage;
@ -306,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	}
 	cv::Mat resizedImage;
-	cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
+	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
 	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
 	if(!ret)
 		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
@ -346,7 +370,7 @@ int main(int argc, char* argv[])
 		return 1;
 	}
-	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
+	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, true);
 	if(!std::filesystem::exists(config.outputDir))
 	{
@ -364,8 +388,28 @@ int main(int argc, char* argv[])
 			std::filesystem::create_directory(debugOutputPath);
 	}
-	std::for_each(std::execution::parallel_unsequenced_policy(),
+	FaceRecognizer* recognizer = nullptr;
-				  imagePaths.begin(), imagePaths.end(), [&yolo, &debugOutputPath, &config](const std::filesystem::path& path){pipeline(path, config, yolo, debugOutputPath);});
+	std::mutex recognizerMutex;
 	if(!config.focusPersonImage.empty())
 	{
 		cv::Mat personImage = cv::imread(config.focusPersonImage);
 		if(personImage.empty())
 		{
 			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
 			return 1;
 		}
 		recognizer = new FaceRecognizer();
 		recognizer->addReferances({personImage});
 		recognizer->setThreshold(config.threshold);
 	}
 	std::mutex yoloMutex;
 	auto pipelineLambda = [&yolo, &debugOutputPath, &config, &yoloMutex, &recognizer, &recognizerMutex](const std::filesystem::path& path)
 	{
 		pipeline(path, config, yolo, yoloMutex, recognizer, recognizerMutex, debugOutputPath);
 	};
 	std::for_each(std::execution::par_unseq, imagePaths.begin(), imagePaths.end(), pipelineLambda);
 	return 0;
 }
--- a/options.h
+++ b/options.h
@ -20,7 +20,10 @@ static struct argp_option options[] =
  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
  {"debug", 		'd', 0,				0,	"output debug images" },
-  {"seam-carving", 	's', 0,				0,	"model to train"},
+  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
  {0}
 };
@ -30,42 +33,64 @@ struct Config
 	std::filesystem::path modelPath;
 	std::filesystem::path classesPath;
 	std::filesystem::path outputDir;
 	std::filesystem::path focusPersonImage;
 	bool seamCarving = false;
 	bool debug = false;
 	double threshold = 0.363;
 	cv::Size targetSize = cv::Size(512, 512);
 };
 static error_t parse_opt (int key, char *arg, struct argp_state *state)
 {
 	Config *config = reinterpret_cast<Config*>(state->input);
-	switch (key)
+	try
 	{
-	case 'q':
+		switch (key)
-		Log::level = Log::ERROR;
+		{
-		break;
+		case 'q':
-	case 'v':
+			Log::level = Log::ERROR;
-		Log::level = Log::DEBUG;
+			break;
-		break;
+		case 'v':
-	case 'm':
+			Log::level = Log::DEBUG;
-		config->modelPath = arg;
+			break;
-		break;
+		case 'm':
-	case 'c':
+			config->modelPath = arg;
-		config->classesPath = arg;
+			break;
-		break;
+		case 'c':
-	case 'd':
+			config->classesPath = arg;
-		config->debug = true;
+			break;
-		break;
+		case 'd':
-	case 'o':
+			config->debug = true;
-		config->outputDir.assign(arg);
+			break;
-		break;
+		case 'o':
-	case 's':
+			config->outputDir.assign(arg);
-		config->seamCarving = true;
+			break;
-		break;
+		case 's':
-	case ARGP_KEY_ARG:
+			config->seamCarving = true;
-		config->imagePaths.push_back(arg);
+			break;
-		break;
+		case 'f':
-	default:
+			config->focusPersonImage = arg;
-		return ARGP_ERR_UNKNOWN;
+			break;
 		case 't':
 			config->threshold = std::atof(arg);
 			break;
 		case 'z':
 		{
 			int x = std::stoi(arg);
 			config->targetSize = cv::Size(x, x);
 			break;
 		}
 		case ARGP_KEY_ARG:
 			config->imagePaths.push_back(arg);
 			break;
 		default:
 			return ARGP_ERR_UNKNOWN;
 		}
 	}
 	catch(const std::invalid_argument& ex)
 	{
 		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
 		return ARGP_KEY_ERROR;
 	}
 	return 0;
 }