add face recognition support to the system
This commit is contained in:
@ -5,7 +5,7 @@ find_package(OpenCV REQUIRED)
|
|||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
|
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME} ${SRC_FILES})
|
add_executable(${PROJECT_NAME} ${SRC_FILES})
|
||||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
|
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
|
||||||
|
136
facerecognizer.cpp
Normal file
136
facerecognizer.cpp
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
#include "facerecognizer.h"
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
#define INCBIN_PREFIX r
|
||||||
|
#include "incbin.h"
|
||||||
|
|
||||||
|
INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
|
||||||
|
INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
|
||||||
|
|
||||||
|
#include <opencv2/dnn/dnn.hpp>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/highgui.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
|
||||||
|
|
||||||
|
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
|
||||||
|
{
|
||||||
|
if(detectorPath.empty())
|
||||||
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin face detection model";
|
||||||
|
|
||||||
|
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
if(!detector)
|
||||||
|
throw LoadException("Unable to load detector network from built in file");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
if(!detector)
|
||||||
|
throw LoadException("Unable to load detector network from "+detectorPath.string());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool defaultNetwork = recognizerPath.empty();
|
||||||
|
|
||||||
|
if(defaultNetwork)
|
||||||
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin face recognition model";
|
||||||
|
recognizerPath = cv::tempfile("onnx");
|
||||||
|
std::ofstream file(recognizerPath);
|
||||||
|
if(!file.is_open())
|
||||||
|
throw LoadException("Unable open temporary file at "+recognizerPath.string());
|
||||||
|
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
|
||||||
|
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
|
||||||
|
if(defaultNetwork)
|
||||||
|
std::filesystem::remove(recognizerPath);
|
||||||
|
|
||||||
|
if(!recognizer)
|
||||||
|
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
|
||||||
|
|
||||||
|
addReferances(referances);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
|
||||||
|
{
|
||||||
|
detector->setInputSize(input.size());
|
||||||
|
cv::Mat faces;
|
||||||
|
detector->detect(input, faces);
|
||||||
|
return faces;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
for(const cv::Mat& image : referances)
|
||||||
|
{
|
||||||
|
cv::Mat faces = detectFaces(image);
|
||||||
|
assert(faces.cols == 15);
|
||||||
|
if(faces.empty())
|
||||||
|
{
|
||||||
|
Log(Log::WARN)<<"A referance image provided dose not contian any face";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(faces.rows > 1)
|
||||||
|
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
|
||||||
|
cv::Mat cropedImage;
|
||||||
|
recognizer->alignCrop(image, faces.row(0), cropedImage);
|
||||||
|
cv::Mat features;
|
||||||
|
recognizer->feature(cropedImage, features);
|
||||||
|
referanceFeatures.push_back(features.clone());
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FaceRecognizer::setThreshold(double threasholdIn)
|
||||||
|
{
|
||||||
|
threshold = threasholdIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
double FaceRecognizer::getThreshold()
|
||||||
|
{
|
||||||
|
return threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FaceRecognizer::clearReferances()
|
||||||
|
{
|
||||||
|
referanceFeatures.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||||
|
{
|
||||||
|
cv::Mat faces = detectFaces(input);
|
||||||
|
|
||||||
|
if(alone && faces.rows > 1)
|
||||||
|
return {-2, 0};
|
||||||
|
|
||||||
|
std::pair<int, double> bestMatch = {-1, 0};
|
||||||
|
|
||||||
|
for(int i = 0; i < faces.rows; ++i)
|
||||||
|
{
|
||||||
|
cv::Mat face;
|
||||||
|
recognizer->alignCrop(input, faces.row(0), face);
|
||||||
|
cv::Mat features;
|
||||||
|
recognizer->feature(face, features);
|
||||||
|
features = features.clone();
|
||||||
|
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||||
|
{
|
||||||
|
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||||
|
if(score > threshold && score > bestMatch.second)
|
||||||
|
{
|
||||||
|
bestMatch = {referanceIndex, score};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestMatch;
|
||||||
|
}
|
41
facerecognizer.h
Normal file
41
facerecognizer.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
#include <opencv2/objdetect/face.hpp>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
class FaceRecognizer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
class LoadException : public std::exception
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
std::string message;
|
||||||
|
public:
|
||||||
|
LoadException(const std::string& msg): std::exception(), message(msg) {}
|
||||||
|
virtual const char* what() const throw() override
|
||||||
|
{
|
||||||
|
return message.c_str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<cv::Mat> referanceFeatures;
|
||||||
|
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
|
||||||
|
std::shared_ptr<cv::FaceDetectorYN> detector;
|
||||||
|
|
||||||
|
double threshold = 0.363;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||||
|
cv::Mat detectFaces(const cv::Mat& input);
|
||||||
|
std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
|
||||||
|
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||||
|
void setThreshold(double threashold);
|
||||||
|
double getThreshold();
|
||||||
|
void clearReferances();
|
||||||
|
};
|
60
main.cpp
60
main.cpp
@ -6,6 +6,7 @@
|
|||||||
#include <opencv2/highgui.hpp>
|
#include <opencv2/highgui.hpp>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <execution>
|
#include <execution>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
@ -15,6 +16,7 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "intelligentroi.h"
|
#include "intelligentroi.h"
|
||||||
#include "seamcarving.h"
|
#include "seamcarving.h"
|
||||||
|
#include "facerecognizer.h"
|
||||||
|
|
||||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||||
{
|
{
|
||||||
@ -223,7 +225,7 @@ void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo:
|
|||||||
for(const Yolo::Detection& detection : detections)
|
for(const Yolo::Detection& detection : detections)
|
||||||
{
|
{
|
||||||
cv::rectangle(image, detection.box, detection.color, 3);
|
cv::rectangle(image, detection.box, detection.color, 3);
|
||||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
|
||||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
||||||
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
||||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||||
@ -251,7 +253,8 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, const std::filesystem::path& debugOutputPath)
|
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, std::mutex& yoloMutex, FaceRecognizer* recognizer,
|
||||||
|
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||||
{
|
{
|
||||||
InteligentRoi intRoi(yolo);
|
InteligentRoi intRoi(yolo);
|
||||||
cv::Mat image = cv::imread(path);
|
cv::Mat image = cv::imread(path);
|
||||||
@ -263,11 +266,28 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
|
|||||||
|
|
||||||
reduceSize(image, config.targetSize);
|
reduceSize(image, config.targetSize);
|
||||||
|
|
||||||
|
yoloMutex.lock();
|
||||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||||
|
yoloMutex.unlock();
|
||||||
|
|
||||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||||
for(const Yolo::Detection& detection : detections)
|
for(Yolo::Detection& detection : detections)
|
||||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
|
{
|
||||||
|
bool hasmatch = false;
|
||||||
|
if(recognizer && detection.className == "person")
|
||||||
|
{
|
||||||
|
cv::Mat person = image(detection.box);
|
||||||
|
reconizerMutex.lock();
|
||||||
|
std::pair<int, double> match = recognizer->isMatch(person);
|
||||||
|
reconizerMutex.unlock();
|
||||||
|
if(match.first >= 0)
|
||||||
|
{
|
||||||
|
detection.priority += 10;
|
||||||
|
hasmatch = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||||
|
}
|
||||||
|
|
||||||
cv::Rect crop;
|
cv::Rect crop;
|
||||||
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
||||||
@ -276,7 +296,11 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
|
|||||||
{
|
{
|
||||||
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
||||||
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
||||||
|
{
|
||||||
|
yoloMutex.lock();
|
||||||
detections = yolo.runInference(image);
|
detections = yolo.runInference(image);
|
||||||
|
yoloMutex.unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Mat croppedImage;
|
cv::Mat croppedImage;
|
||||||
@ -306,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
|
|||||||
}
|
}
|
||||||
|
|
||||||
cv::Mat resizedImage;
|
cv::Mat resizedImage;
|
||||||
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
|
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
|
||||||
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||||
if(!ret)
|
if(!ret)
|
||||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||||
@ -346,7 +370,7 @@ int main(int argc, char* argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, true);
|
||||||
|
|
||||||
if(!std::filesystem::exists(config.outputDir))
|
if(!std::filesystem::exists(config.outputDir))
|
||||||
{
|
{
|
||||||
@ -364,8 +388,28 @@ int main(int argc, char* argv[])
|
|||||||
std::filesystem::create_directory(debugOutputPath);
|
std::filesystem::create_directory(debugOutputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::for_each(std::execution::parallel_unsequenced_policy(),
|
FaceRecognizer* recognizer = nullptr;
|
||||||
imagePaths.begin(), imagePaths.end(), [&yolo, &debugOutputPath, &config](const std::filesystem::path& path){pipeline(path, config, yolo, debugOutputPath);});
|
std::mutex recognizerMutex;
|
||||||
|
if(!config.focusPersonImage.empty())
|
||||||
|
{
|
||||||
|
cv::Mat personImage = cv::imread(config.focusPersonImage);
|
||||||
|
if(personImage.empty())
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
recognizer = new FaceRecognizer();
|
||||||
|
recognizer->addReferances({personImage});
|
||||||
|
recognizer->setThreshold(config.threshold);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::mutex yoloMutex;
|
||||||
|
|
||||||
|
auto pipelineLambda = [&yolo, &debugOutputPath, &config, &yoloMutex, &recognizer, &recognizerMutex](const std::filesystem::path& path)
|
||||||
|
{
|
||||||
|
pipeline(path, config, yolo, yoloMutex, recognizer, recognizerMutex, debugOutputPath);
|
||||||
|
};
|
||||||
|
std::for_each(std::execution::par_unseq, imagePaths.begin(), imagePaths.end(), pipelineLambda);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
81
options.h
81
options.h
@ -20,7 +20,10 @@ static struct argp_option options[] =
|
|||||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||||
{"debug", 'd', 0, 0, "output debug images" },
|
{"debug", 'd', 0, 0, "output debug images" },
|
||||||
{"seam-carving", 's', 0, 0, "model to train"},
|
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
|
||||||
|
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
|
||||||
|
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
|
||||||
|
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
|
||||||
{0}
|
{0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -30,42 +33,64 @@ struct Config
|
|||||||
std::filesystem::path modelPath;
|
std::filesystem::path modelPath;
|
||||||
std::filesystem::path classesPath;
|
std::filesystem::path classesPath;
|
||||||
std::filesystem::path outputDir;
|
std::filesystem::path outputDir;
|
||||||
|
std::filesystem::path focusPersonImage;
|
||||||
bool seamCarving = false;
|
bool seamCarving = false;
|
||||||
bool debug = false;
|
bool debug = false;
|
||||||
|
double threshold = 0.363;
|
||||||
cv::Size targetSize = cv::Size(512, 512);
|
cv::Size targetSize = cv::Size(512, 512);
|
||||||
};
|
};
|
||||||
|
|
||||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||||
{
|
{
|
||||||
Config *config = reinterpret_cast<Config*>(state->input);
|
Config *config = reinterpret_cast<Config*>(state->input);
|
||||||
switch (key)
|
try
|
||||||
{
|
{
|
||||||
case 'q':
|
switch (key)
|
||||||
Log::level = Log::ERROR;
|
{
|
||||||
break;
|
case 'q':
|
||||||
case 'v':
|
Log::level = Log::ERROR;
|
||||||
Log::level = Log::DEBUG;
|
break;
|
||||||
break;
|
case 'v':
|
||||||
case 'm':
|
Log::level = Log::DEBUG;
|
||||||
config->modelPath = arg;
|
break;
|
||||||
break;
|
case 'm':
|
||||||
case 'c':
|
config->modelPath = arg;
|
||||||
config->classesPath = arg;
|
break;
|
||||||
break;
|
case 'c':
|
||||||
case 'd':
|
config->classesPath = arg;
|
||||||
config->debug = true;
|
break;
|
||||||
break;
|
case 'd':
|
||||||
case 'o':
|
config->debug = true;
|
||||||
config->outputDir.assign(arg);
|
break;
|
||||||
break;
|
case 'o':
|
||||||
case 's':
|
config->outputDir.assign(arg);
|
||||||
config->seamCarving = true;
|
break;
|
||||||
break;
|
case 's':
|
||||||
case ARGP_KEY_ARG:
|
config->seamCarving = true;
|
||||||
config->imagePaths.push_back(arg);
|
break;
|
||||||
break;
|
case 'f':
|
||||||
default:
|
config->focusPersonImage = arg;
|
||||||
return ARGP_ERR_UNKNOWN;
|
break;
|
||||||
|
case 't':
|
||||||
|
config->threshold = std::atof(arg);
|
||||||
|
break;
|
||||||
|
case 'z':
|
||||||
|
{
|
||||||
|
int x = std::stoi(arg);
|
||||||
|
config->targetSize = cv::Size(x, x);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARGP_KEY_ARG:
|
||||||
|
config->imagePaths.push_back(arg);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return ARGP_ERR_UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(const std::invalid_argument& ex)
|
||||||
|
{
|
||||||
|
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
|
||||||
|
return ARGP_KEY_ERROR;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user