add face recognition support to the system

This commit is contained in:
2024-04-05 11:24:04 +02:00
parent b2ffbfa530
commit a279001151
5 changed files with 283 additions and 37 deletions

View File

@ -5,7 +5,7 @@ find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
add_executable(${PROJECT_NAME} ${SRC_FILES})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)

136
facerecognizer.cpp Normal file
View File

@ -0,0 +1,136 @@
#include "facerecognizer.h"
#include <filesystem>
#define INCBIN_PREFIX r
#include "incbin.h"
INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include "log.h"
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
{
if(detectorPath.empty())
{
Log(Log::INFO)<<"Using builtin face detection model";
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from built in file");
}
else
{
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from "+detectorPath.string());
}
bool defaultNetwork = recognizerPath.empty();
if(defaultNetwork)
{
Log(Log::INFO)<<"Using builtin face recognition model";
recognizerPath = cv::tempfile("onnx");
std::ofstream file(recognizerPath);
if(!file.is_open())
throw LoadException("Unable open temporary file at "+recognizerPath.string());
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
file.close();
}
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(defaultNetwork)
std::filesystem::remove(recognizerPath);
if(!recognizer)
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
addReferances(referances);
}
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
{
detector->setInputSize(input.size());
cv::Mat faces;
detector->detect(input, faces);
return faces;
}
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
{
bool ret = false;
for(const cv::Mat& image : referances)
{
cv::Mat faces = detectFaces(image);
assert(faces.cols == 15);
if(faces.empty())
{
Log(Log::WARN)<<"A referance image provided dose not contian any face";
continue;
}
if(faces.rows > 1)
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
cv::Mat cropedImage;
recognizer->alignCrop(image, faces.row(0), cropedImage);
cv::Mat features;
recognizer->feature(cropedImage, features);
referanceFeatures.push_back(features.clone());
ret = true;
}
return ret;
}
void FaceRecognizer::setThreshold(double threasholdIn)
{
threshold = threasholdIn;
}
double FaceRecognizer::getThreshold()
{
return threshold;
}
void FaceRecognizer::clearReferances()
{
referanceFeatures.clear();
}
std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
{
cv::Mat faces = detectFaces(input);
if(alone && faces.rows > 1)
return {-2, 0};
std::pair<int, double> bestMatch = {-1, 0};
for(int i = 0; i < faces.rows; ++i)
{
cv::Mat face;
recognizer->alignCrop(input, faces.row(0), face);
cv::Mat features;
recognizer->feature(face, features);
features = features.clone();
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
{
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
if(score > threshold && score > bestMatch.second)
{
bestMatch = {referanceIndex, score};
}
}
}
return bestMatch;
}

41
facerecognizer.h Normal file
View File

@ -0,0 +1,41 @@
#pragma once
#include <exception>
#include <opencv2/core/mat.hpp>
#include <opencv2/objdetect/face.hpp>
#include <opencv2/core.hpp>
#include <vector>
#include <memory>
#include <filesystem>
class FaceRecognizer
{
public:
class LoadException : public std::exception
{
private:
std::string message;
public:
LoadException(const std::string& msg): std::exception(), message(msg) {}
virtual const char* what() const throw() override
{
return message.c_str();
}
};
private:
std::vector<cv::Mat> referanceFeatures;
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
std::shared_ptr<cv::FaceDetectorYN> detector;
double threshold = 0.363;
public:
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
cv::Mat detectFaces(const cv::Mat& input);
std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
bool addReferances(const std::vector<cv::Mat>& referances);
void setThreshold(double threashold);
double getThreshold();
void clearReferances();
};

View File

@ -6,6 +6,7 @@
#include <opencv2/highgui.hpp>
#include <algorithm>
#include <execution>
#include <string>
#include <vector>
#include <numeric>
@ -15,6 +16,7 @@
#include "utils.h"
#include "intelligentroi.h"
#include "seamcarving.h"
#include "facerecognizer.h"
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
@ -223,7 +225,7 @@ void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo:
for(const Yolo::Detection& detection : detections)
{
cv::rectangle(image, detection.box, detection.color, 3);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
cv::rectangle(image, textBox, detection.color, cv::FILLED);
@ -251,7 +253,8 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
}
}
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, const std::filesystem::path& debugOutputPath)
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, std::mutex& yoloMutex, FaceRecognizer* recognizer,
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
{
InteligentRoi intRoi(yolo);
cv::Mat image = cv::imread(path);
@ -263,11 +266,28 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
reduceSize(image, config.targetSize);
yoloMutex.lock();
std::vector<Yolo::Detection> detections = yolo.runInference(image);
yoloMutex.unlock();
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
for(const Yolo::Detection& detection : detections)
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
for(Yolo::Detection& detection : detections)
{
bool hasmatch = false;
if(recognizer && detection.className == "person")
{
cv::Mat person = image(detection.box);
reconizerMutex.lock();
std::pair<int, double> match = recognizer->isMatch(person);
reconizerMutex.unlock();
if(match.first >= 0)
{
detection.priority += 10;
hasmatch = true;
}
}
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
}
cv::Rect crop;
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
@ -276,7 +296,11 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
{
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
{
yoloMutex.lock();
detections = yolo.runInference(image);
yoloMutex.unlock();
}
}
cv::Mat croppedImage;
@ -306,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
}
cv::Mat resizedImage;
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
if(!ret)
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
@ -346,7 +370,7 @@ int main(int argc, char* argv[])
return 1;
}
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, true);
if(!std::filesystem::exists(config.outputDir))
{
@ -364,8 +388,28 @@ int main(int argc, char* argv[])
std::filesystem::create_directory(debugOutputPath);
}
std::for_each(std::execution::parallel_unsequenced_policy(),
imagePaths.begin(), imagePaths.end(), [&yolo, &debugOutputPath, &config](const std::filesystem::path& path){pipeline(path, config, yolo, debugOutputPath);});
FaceRecognizer* recognizer = nullptr;
std::mutex recognizerMutex;
if(!config.focusPersonImage.empty())
{
cv::Mat personImage = cv::imread(config.focusPersonImage);
if(personImage.empty())
{
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
return 1;
}
recognizer = new FaceRecognizer();
recognizer->addReferances({personImage});
recognizer->setThreshold(config.threshold);
}
std::mutex yoloMutex;
auto pipelineLambda = [&yolo, &debugOutputPath, &config, &yoloMutex, &recognizer, &recognizerMutex](const std::filesystem::path& path)
{
pipeline(path, config, yolo, yoloMutex, recognizer, recognizerMutex, debugOutputPath);
};
std::for_each(std::execution::par_unseq, imagePaths.begin(), imagePaths.end(), pipelineLambda);
return 0;
}

View File

@ -20,7 +20,10 @@ static struct argp_option options[] =
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "model to train"},
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
{0}
};
@ -30,14 +33,18 @@ struct Config
std::filesystem::path modelPath;
std::filesystem::path classesPath;
std::filesystem::path outputDir;
std::filesystem::path focusPersonImage;
bool seamCarving = false;
bool debug = false;
double threshold = 0.363;
cv::Size targetSize = cv::Size(512, 512);
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
try
{
switch (key)
{
case 'q':
@ -61,12 +68,30 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
case 's':
config->seamCarving = true;
break;
case 'f':
config->focusPersonImage = arg;
break;
case 't':
config->threshold = std::atof(arg);
break;
case 'z':
{
int x = std::stoi(arg);
config->targetSize = cv::Size(x, x);
break;
}
case ARGP_KEY_ARG:
config->imagePaths.push_back(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
}
catch(const std::invalid_argument& ex)
{
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
return ARGP_KEY_ERROR;
}
return 0;
}