Add person dataset assembler, restructure repo
This commit is contained in:
16
SmartCrop/CMakeLists.txt
Normal file
16
SmartCrop/CMakeLists.txt
Normal file
@ -0,0 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.6)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||
|
||||
add_executable(smartcrop ${SRC_FILES})
|
||||
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(smartcrop PRIVATE -s -g -Wall)
|
||||
message(WARNING ${WEIGHT_DIR})
|
||||
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
|
||||
|
||||
install(TARGETS smartcrop RUNTIME DESTINATION bin)
|
136
SmartCrop/facerecognizer.cpp
Normal file
136
SmartCrop/facerecognizer.cpp
Normal file
@ -0,0 +1,136 @@
|
||||
#include "facerecognizer.h"
|
||||
#include <filesystem>
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
|
||||
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <fstream>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
|
||||
|
||||
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
if(detectorPath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face detection model";
|
||||
|
||||
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from built in file");
|
||||
}
|
||||
else
|
||||
{
|
||||
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from "+detectorPath.string());
|
||||
}
|
||||
|
||||
bool defaultNetwork = recognizerPath.empty();
|
||||
|
||||
if(defaultNetwork)
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face recognition model";
|
||||
recognizerPath = cv::tempfile("onnx");
|
||||
std::ofstream file(recognizerPath);
|
||||
if(!file.is_open())
|
||||
throw LoadException("Unable open temporary file at "+recognizerPath.string());
|
||||
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
|
||||
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
|
||||
file.close();
|
||||
}
|
||||
|
||||
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
|
||||
if(defaultNetwork)
|
||||
std::filesystem::remove(recognizerPath);
|
||||
|
||||
if(!recognizer)
|
||||
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
|
||||
|
||||
addReferances(referances);
|
||||
}
|
||||
|
||||
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
|
||||
{
|
||||
detector->setInputSize(input.size());
|
||||
cv::Mat faces;
|
||||
detector->detect(input, faces);
|
||||
return faces;
|
||||
}
|
||||
|
||||
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
bool ret = false;
|
||||
for(const cv::Mat& image : referances)
|
||||
{
|
||||
cv::Mat faces = detectFaces(image);
|
||||
assert(faces.cols == 15);
|
||||
if(faces.empty())
|
||||
{
|
||||
Log(Log::WARN)<<"A referance image provided dose not contian any face";
|
||||
continue;
|
||||
}
|
||||
if(faces.rows > 1)
|
||||
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
|
||||
cv::Mat cropedImage;
|
||||
recognizer->alignCrop(image, faces.row(0), cropedImage);
|
||||
cv::Mat features;
|
||||
recognizer->feature(cropedImage, features);
|
||||
referanceFeatures.push_back(features.clone());
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FaceRecognizer::setThreshold(double threasholdIn)
|
||||
{
|
||||
threshold = threasholdIn;
|
||||
}
|
||||
|
||||
double FaceRecognizer::getThreshold()
|
||||
{
|
||||
return threshold;
|
||||
}
|
||||
|
||||
void FaceRecognizer::clearReferances()
|
||||
{
|
||||
referanceFeatures.clear();
|
||||
}
|
||||
|
||||
std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||
{
|
||||
cv::Mat faces = detectFaces(input);
|
||||
|
||||
if(alone && faces.rows > 1)
|
||||
return {-2, 0};
|
||||
|
||||
std::pair<int, double> bestMatch = {-1, 0};
|
||||
|
||||
for(int i = 0; i < faces.rows; ++i)
|
||||
{
|
||||
cv::Mat face;
|
||||
recognizer->alignCrop(input, faces.row(0), face);
|
||||
cv::Mat features;
|
||||
recognizer->feature(face, features);
|
||||
features = features.clone();
|
||||
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||
{
|
||||
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||
if(score > threshold && score > bestMatch.second)
|
||||
{
|
||||
bestMatch = {referanceIndex, score};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
41
SmartCrop/facerecognizer.h
Normal file
41
SmartCrop/facerecognizer.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
#include <exception>
|
||||
#include <opencv2/core/mat.hpp>
|
||||
#include <opencv2/objdetect/face.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <filesystem>
|
||||
|
||||
class FaceRecognizer
|
||||
{
|
||||
public:
|
||||
|
||||
class LoadException : public std::exception
|
||||
{
|
||||
private:
|
||||
std::string message;
|
||||
public:
|
||||
LoadException(const std::string& msg): std::exception(), message(msg) {}
|
||||
virtual const char* what() const throw() override
|
||||
{
|
||||
return message.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
std::vector<cv::Mat> referanceFeatures;
|
||||
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
|
||||
std::shared_ptr<cv::FaceDetectorYN> detector;
|
||||
|
||||
double threshold = 0.363;
|
||||
|
||||
public:
|
||||
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||
cv::Mat detectFaces(const cv::Mat& input);
|
||||
std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
|
||||
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||
void setThreshold(double threashold);
|
||||
double getThreshold();
|
||||
void clearReferances();
|
||||
};
|
476
SmartCrop/incbin.h
Normal file
476
SmartCrop/incbin.h
Normal file
@ -0,0 +1,476 @@
|
||||
/**
|
||||
* @file incbin.h
|
||||
* @author Dale Weiler
|
||||
* @brief Utility for including binary files
|
||||
*
|
||||
* Facilities for including binary files into the current translation unit and
|
||||
* making use from them externally in other translation units.
|
||||
*/
|
||||
#ifndef INCBIN_HDR
|
||||
#define INCBIN_HDR
|
||||
#include <limits.h>
|
||||
#if defined(__AVX512BW__) || \
|
||||
defined(__AVX512CD__) || \
|
||||
defined(__AVX512DQ__) || \
|
||||
defined(__AVX512ER__) || \
|
||||
defined(__AVX512PF__) || \
|
||||
defined(__AVX512VL__) || \
|
||||
defined(__AVX512F__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 6
|
||||
#elif defined(__AVX__) || \
|
||||
defined(__AVX2__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 5
|
||||
#elif defined(__SSE__) || \
|
||||
defined(__SSE2__) || \
|
||||
defined(__SSE3__) || \
|
||||
defined(__SSSE3__) || \
|
||||
defined(__SSE4_1__) || \
|
||||
defined(__SSE4_2__) || \
|
||||
defined(__neon__) || \
|
||||
defined(__ARM_NEON) || \
|
||||
defined(__ALTIVEC__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 4
|
||||
#elif ULONG_MAX != 0xffffffffu
|
||||
# define INCBIN_ALIGNMENT_INDEX 3
|
||||
# else
|
||||
# define INCBIN_ALIGNMENT_INDEX 2
|
||||
#endif
|
||||
|
||||
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
|
||||
#define INCBIN_ALIGN_SHIFT_0 1
|
||||
#define INCBIN_ALIGN_SHIFT_1 2
|
||||
#define INCBIN_ALIGN_SHIFT_2 4
|
||||
#define INCBIN_ALIGN_SHIFT_3 8
|
||||
#define INCBIN_ALIGN_SHIFT_4 16
|
||||
#define INCBIN_ALIGN_SHIFT_5 32
|
||||
#define INCBIN_ALIGN_SHIFT_6 64
|
||||
|
||||
/* Actual alignment value */
|
||||
#define INCBIN_ALIGNMENT \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
|
||||
INCBIN_ALIGNMENT_INDEX)
|
||||
|
||||
/* Stringize */
|
||||
#define INCBIN_STR(X) \
|
||||
#X
|
||||
#define INCBIN_STRINGIZE(X) \
|
||||
INCBIN_STR(X)
|
||||
/* Concatenate */
|
||||
#define INCBIN_CAT(X, Y) \
|
||||
X ## Y
|
||||
#define INCBIN_CONCATENATE(X, Y) \
|
||||
INCBIN_CAT(X, Y)
|
||||
/* Deferred macro expansion */
|
||||
#define INCBIN_EVAL(X) \
|
||||
X
|
||||
#define INCBIN_INVOKE(N, ...) \
|
||||
INCBIN_EVAL(N(__VA_ARGS__))
|
||||
/* Variable argument count for overloading by arity */
|
||||
#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
|
||||
#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
|
||||
|
||||
/* Green Hills uses a different directive for including binary data */
|
||||
#if defined(__ghs__)
|
||||
# if (__ghs_asm == 2)
|
||||
# define INCBIN_MACRO ".file"
|
||||
/* Or consider the ".myrawdata" entry in the ld file */
|
||||
# else
|
||||
# define INCBIN_MACRO "\tINCBIN"
|
||||
# endif
|
||||
#else
|
||||
# define INCBIN_MACRO ".incbin"
|
||||
#endif
|
||||
|
||||
#ifndef _MSC_VER
|
||||
# define INCBIN_ALIGN \
|
||||
__attribute__((aligned(INCBIN_ALIGNMENT)))
|
||||
#else
|
||||
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || /* GNU C and RealView */ \
|
||||
defined(__arm) || /* Diab */ \
|
||||
defined(_ARM) /* ImageCraft */
|
||||
# define INCBIN_ARM
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Utilize .balign where supported */
|
||||
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".balign 1\n"
|
||||
#elif defined(INCBIN_ARM)
|
||||
/*
|
||||
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
|
||||
* the shift count. This is the value passed to `.align'
|
||||
*/
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 0\n"
|
||||
#else
|
||||
/* We assume other inline assembler's treat `.align' as `.balign' */
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 1\n"
|
||||
#endif
|
||||
|
||||
/* INCBIN_CONST is used by incbin.c generated files */
|
||||
#if defined(__cplusplus)
|
||||
# define INCBIN_EXTERNAL extern "C"
|
||||
# define INCBIN_CONST extern const
|
||||
#else
|
||||
# define INCBIN_EXTERNAL extern
|
||||
# define INCBIN_CONST const
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which size and data is
|
||||
* emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_SECTION)
|
||||
# if defined(__APPLE__)
|
||||
# define INCBIN_OUTPUT_SECTION ".const_data"
|
||||
# else
|
||||
# define INCBIN_OUTPUT_SECTION ".rodata"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which data is emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_DATA_SECTION)
|
||||
# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which size is emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*
|
||||
* @note This is useful for Harvard architectures where program memory cannot
|
||||
* be directly read from the program without special instructions. With this you
|
||||
* can chose to put the size variable in RAM rather than ROM.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
|
||||
# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
# include "TargetConditionals.h"
|
||||
# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
|
||||
# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
|
||||
# endif
|
||||
/* The directives are different for Apple branded compilers */
|
||||
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# define INCBIN_INT ".long "
|
||||
# define INCBIN_MANGLE "_"
|
||||
# define INCBIN_BYTE ".byte "
|
||||
# define INCBIN_TYPE(...)
|
||||
#else
|
||||
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# if defined(__ghs__)
|
||||
# define INCBIN_INT ".word "
|
||||
# else
|
||||
# define INCBIN_INT ".int "
|
||||
# endif
|
||||
# if defined(__USER_LABEL_PREFIX__)
|
||||
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
|
||||
# else
|
||||
# define INCBIN_MANGLE ""
|
||||
# endif
|
||||
# if defined(INCBIN_ARM)
|
||||
/* On arm assemblers, `@' is used as a line comment token */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
|
||||
# elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
/* Mingw doesn't support this directive either */
|
||||
# define INCBIN_TYPE(NAME)
|
||||
# else
|
||||
/* It's safe to use `@' on other architectures */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
|
||||
# endif
|
||||
# define INCBIN_BYTE ".byte "
|
||||
#endif
|
||||
|
||||
/* List of style types used for symbol names */
|
||||
#define INCBIN_STYLE_CAMEL 0
|
||||
#define INCBIN_STYLE_SNAKE 1
|
||||
|
||||
/**
|
||||
* @brief Specify the prefix to use for symbol names.
|
||||
*
|
||||
* @note By default this is "g".
|
||||
*
|
||||
* @code
|
||||
* #define INCBIN_PREFIX incbin
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols instead:
|
||||
* // const unsigned char incbinFoo<data>[];
|
||||
* // const unsigned char *const incbinFoo<end>;
|
||||
* // const unsigned int incbinFoo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_PREFIX)
|
||||
# define INCBIN_PREFIX g
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Specify the style used for symbol names.
|
||||
*
|
||||
* Possible options are
|
||||
* - INCBIN_STYLE_CAMEL "CamelCase"
|
||||
* - INCBIN_STYLE_SNAKE "snake_case"
|
||||
*
|
||||
* @note By default this is INCBIN_STYLE_CAMEL
|
||||
*
|
||||
* @code
|
||||
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
|
||||
* #include "incbin.h"
|
||||
* INCBIN(foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>foo_data[];
|
||||
* // const unsigned char *const <prefix>foo_end;
|
||||
* // const unsigned int <prefix>foo_size;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_STYLE)
|
||||
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
|
||||
#endif
|
||||
|
||||
/* Style lookup tables */
|
||||
#define INCBIN_STYLE_0_DATA Data
|
||||
#define INCBIN_STYLE_0_END End
|
||||
#define INCBIN_STYLE_0_SIZE Size
|
||||
#define INCBIN_STYLE_1_DATA _data
|
||||
#define INCBIN_STYLE_1_END _end
|
||||
#define INCBIN_STYLE_1_SIZE _size
|
||||
|
||||
/* Style lookup: returning identifier */
|
||||
#define INCBIN_STYLE_IDENT(TYPE) \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_STYLE_, \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_EVAL(INCBIN_STYLE), \
|
||||
INCBIN_CONCATENATE(_, TYPE)))
|
||||
|
||||
/* Style lookup: returning string literal */
|
||||
#define INCBIN_STYLE_STRING(TYPE) \
|
||||
INCBIN_STRINGIZE( \
|
||||
INCBIN_STYLE_IDENT(TYPE)) \
|
||||
|
||||
/* Generate the global labels by indirectly invoking the macro with our style
|
||||
* type and concatenating the name against them. */
|
||||
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_GLOBAL, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE))) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_TYPE, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE)))
|
||||
|
||||
/**
|
||||
* @brief Externally reference binary data included in another translation unit.
|
||||
*
|
||||
* Produces three external symbols that reference the binary data included in
|
||||
* another translation unit.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
||||
* @param NAME The name given for the binary data
|
||||
*
|
||||
* @code
|
||||
* INCBIN_EXTERN(Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const unsigned char <prefix>Foo<data>[];
|
||||
* // extern const unsigned char *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*
|
||||
* You may specify a custom optional data type as well as the first argument.
|
||||
* @code
|
||||
* INCBIN_EXTERN(custom_type, Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const custom_type <prefix>Foo<data>[];
|
||||
* // extern const custom_type *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#define INCBIN_EXTERN(...) \
|
||||
INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
||||
#define INCBIN_EXTERN_1(NAME, ...) \
|
||||
INCBIN_EXTERN_2(unsigned char, NAME)
|
||||
#define INCBIN_EXTERN_2(TYPE, NAME) \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(DATA))[]; \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(END)); \
|
||||
INCBIN_EXTERNAL const unsigned int \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(SIZE))
|
||||
|
||||
/**
|
||||
* @brief Externally reference textual data included in another translation unit.
|
||||
*
|
||||
* Produces three external symbols that reference the textual data included in
|
||||
* another translation unit.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name given for the textual data
|
||||
*
|
||||
* @code
|
||||
* INCBIN_EXTERN(Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const char <prefix>Foo<data>[];
|
||||
* // extern const char *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#define INCTXT_EXTERN(NAME) \
|
||||
INCBIN_EXTERN_2(char, NAME)
|
||||
|
||||
/**
|
||||
* @brief Include a binary file into the current translation unit.
|
||||
*
|
||||
* Includes a binary file into the current translation unit, producing three symbols
|
||||
* for objects that encode the data and size respectively.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
||||
* @param NAME The name to associate with this binary data (as an identifier.)
|
||||
* @param FILENAME The file to include (as a string literal.)
|
||||
*
|
||||
* @code
|
||||
* INCBIN(Icon, "icon.png");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>Icon<data>[];
|
||||
* // const unsigned char *const <prefix>Icon<end>;
|
||||
* // const unsigned int <prefix>Icon<size>;
|
||||
* @endcode
|
||||
*
|
||||
* You may specify a custom optional data type as well as the first argument.
|
||||
* These macros are specialized by arity.
|
||||
* @code
|
||||
* INCBIN(custom_type, Icon, "icon.png");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const custom_type <prefix>Icon<data>[];
|
||||
* // const custom_type *const <prefix>Icon<end>;
|
||||
* // const unsigned int <prefix>Icon<size>;
|
||||
* @endcode
|
||||
*
|
||||
* @warning This must be used in global scope
|
||||
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
||||
*
|
||||
* To externally reference the data included by this in another translation unit
|
||||
* please @see INCBIN_EXTERN.
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
# define INCBIN(NAME, FILENAME) \
|
||||
INCBIN_EXTERN(NAME)
|
||||
#else
|
||||
# define INCBIN(...) \
|
||||
INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
||||
# if defined(__GNUC__)
|
||||
# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
|
||||
# elif defined(__clang__)
|
||||
# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
|
||||
# else
|
||||
# define INCBIN_1(...) /* Cannot do anything here */
|
||||
# endif
|
||||
# define INCBIN_2(NAME, FILENAME) \
|
||||
INCBIN_3(unsigned char, NAME, FILENAME)
|
||||
# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
|
||||
# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
|
||||
__asm__(INCBIN_SECTION \
|
||||
INCBIN_GLOBAL_LABELS(NAME, DATA) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
|
||||
INCBIN_MACRO " \"" FILENAME "\"\n" \
|
||||
TERMINATOR \
|
||||
INCBIN_GLOBAL_LABELS(NAME, END) \
|
||||
INCBIN_ALIGN_BYTE \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
|
||||
INCBIN_BYTE "1\n" \
|
||||
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
|
||||
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
|
||||
INCBIN_ALIGN_HOST \
|
||||
".text\n" \
|
||||
); \
|
||||
INCBIN_EXTERN(TYPE, NAME)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Include a textual file into the current translation unit.
|
||||
*
|
||||
* This behaves the same as INCBIN except it produces char compatible arrays
|
||||
* and implicitly adds a null-terminator byte, thus the size of data included
|
||||
* by this is one byte larger than that of INCBIN.
|
||||
*
|
||||
* Includes a textual file into the current translation unit, producing three
|
||||
* symbols for objects that encode the data and size respectively.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name to associate with this binary data (as an identifier.)
|
||||
* @param FILENAME The file to include (as a string literal.)
|
||||
*
|
||||
* @code
|
||||
* INCTXT(Readme, "readme.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const char <prefix>Readme<data>[];
|
||||
* // const char *const <prefix>Readme<end>;
|
||||
* // const unsigned int <prefix>Readme<size>;
|
||||
* @endcode
|
||||
*
|
||||
* @warning This must be used in global scope
|
||||
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
||||
*
|
||||
* To externally reference the data included by this in another translation unit
|
||||
* please @see INCBIN_EXTERN.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define INCTXT(NAME, FILENAME) \
|
||||
INCBIN_EXTERN(NAME)
|
||||
#else
|
||||
# define INCTXT(NAME, FILENAME) \
|
||||
INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
|
||||
#endif
|
||||
|
||||
#endif
|
108
SmartCrop/intelligentroi.cpp
Normal file
108
SmartCrop/intelligentroi.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
#include "intelligentroi.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "utils.h"
|
||||
#include "log.h"
|
||||
|
||||
bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center)
|
||||
{
|
||||
if(a.second != b.second)
|
||||
return a.second > b.second;
|
||||
|
||||
double distA = pointDist(a.first, center);
|
||||
double distB = pointDist(b.first, center);
|
||||
|
||||
return distA < distB;
|
||||
}
|
||||
|
||||
void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
|
||||
{
|
||||
if(!pointInRect(point, rect))
|
||||
{
|
||||
if(point.x < rect.x)
|
||||
rect.x = point.x;
|
||||
else if(point.x > rect.x+rect.width)
|
||||
rect.x = point.x-rect.width;
|
||||
if(point.y < rect.y)
|
||||
rect.y = point.y;
|
||||
else if(point.y > rect.y+rect.height)
|
||||
rect.y = point.y-rect.height;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
||||
{
|
||||
incompleate = false;
|
||||
int diameter = std::min(imageSize.height, imageSize.width);
|
||||
cv::Point2i point(imageSize.width/2, imageSize.height/2);
|
||||
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
|
||||
|
||||
std::sort(mustInclude.begin(), mustInclude.end(),
|
||||
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
|
||||
|
||||
while(true)
|
||||
{
|
||||
cv::Rect includeRect = rectFromPoints(mustInclude);
|
||||
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
|
||||
{
|
||||
incompleate = true;
|
||||
slideRectToPoint(candiate, mustInclude.back().first);
|
||||
mustInclude.pop_back();
|
||||
Log(Log::DEBUG)<<"cant fill";
|
||||
for(const std::pair<cv::Point2i, int>& mipoint : mustInclude)
|
||||
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
|
||||
slideRectToPoint(candiate, includePoint.first);
|
||||
|
||||
if(candiate.x < 0)
|
||||
candiate.x = 0;
|
||||
if(candiate.y < 0)
|
||||
candiate.y = 0;
|
||||
if(candiate.x+candiate.width > imageSize.width)
|
||||
candiate.width = imageSize.width-candiate.x;
|
||||
if(candiate.y+candiate.height > imageSize.height)
|
||||
candiate.height = imageSize.height-candiate.y;
|
||||
|
||||
return candiate;
|
||||
}
|
||||
|
||||
InteligentRoi::InteligentRoi(const Yolo& yolo)
|
||||
{
|
||||
personId = yolo.getClassForStr("person");
|
||||
}
|
||||
|
||||
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
||||
{
|
||||
std::vector<std::pair<cv::Point2i, int>> corners;
|
||||
for(size_t i = 0; i < detections.size(); ++i)
|
||||
{
|
||||
int priority = detections[i].priority;
|
||||
if(detections[i].class_id == personId)
|
||||
{
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
|
||||
corners.push_back({detections[i].box.tl(), priority+1});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
else
|
||||
{
|
||||
corners.push_back({detections[i].box.tl(), priority});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
}
|
||||
|
||||
bool incompleate;
|
||||
out = maxRect(incompleate, imageSize, corners);
|
||||
return incompleate;
|
||||
}
|
18
SmartCrop/intelligentroi.h
Normal file
18
SmartCrop/intelligentroi.h
Normal file
@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
class InteligentRoi
|
||||
{
|
||||
private:
|
||||
int personId;
|
||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
|
||||
public:
|
||||
InteligentRoi(const Yolo& yolo);
|
||||
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
};
|
63
SmartCrop/log.cpp
Normal file
63
SmartCrop/log.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Lubricant Detecter
|
||||
* Copyright (C) 2021 Carl Klemm
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "log.h"
|
||||
|
||||
Log::Log(Level type, bool endlineI): endline(endlineI)
|
||||
{
|
||||
msglevel = type;
|
||||
if(headers)
|
||||
{
|
||||
operator << ("["+getLabel(type)+"] ");
|
||||
}
|
||||
}
|
||||
|
||||
Log::~Log()
|
||||
{
|
||||
if(opened && endline)
|
||||
{
|
||||
std::cout<<'\n';
|
||||
}
|
||||
opened = false;
|
||||
}
|
||||
|
||||
|
||||
std::string Log::getLabel(Level level)
|
||||
{
|
||||
std::string label;
|
||||
switch(level)
|
||||
{
|
||||
case DEBUG:
|
||||
label = "DEBUG";
|
||||
break;
|
||||
case INFO:
|
||||
label = "INFO ";
|
||||
break;
|
||||
case WARN:
|
||||
label = "WARN ";
|
||||
break;
|
||||
case ERROR:
|
||||
label = "ERROR";
|
||||
break;
|
||||
}
|
||||
return label;
|
||||
}
|
||||
|
||||
bool Log::headers = false;
|
||||
Log::Level Log::level = WARN;
|
64
SmartCrop/log.h
Normal file
64
SmartCrop/log.h
Normal file
@ -0,0 +1,64 @@
|
||||
/**
|
||||
* eisgenerator
|
||||
* Copyright (C) 2021 Carl Klemm
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
class Log
|
||||
{
|
||||
public:
|
||||
|
||||
enum Level
|
||||
{
|
||||
DEBUG,
|
||||
INFO,
|
||||
WARN,
|
||||
ERROR
|
||||
};
|
||||
|
||||
private:
|
||||
bool opened = false;
|
||||
Level msglevel = DEBUG;
|
||||
bool endline = true;
|
||||
|
||||
std::string getLabel(Level level);
|
||||
|
||||
public:
|
||||
|
||||
static bool headers;
|
||||
static Level level;
|
||||
|
||||
Log() {}
|
||||
Log(Level type, bool endlineI = true);
|
||||
~Log();
|
||||
|
||||
template<class T> Log &operator<<(const T &msg)
|
||||
{
|
||||
if(msglevel >= level)
|
||||
{
|
||||
if(msglevel == ERROR)
|
||||
std::cerr<<msg;
|
||||
else
|
||||
std::cout<<msg;
|
||||
opened = true;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
439
SmartCrop/main.cpp
Normal file
439
SmartCrop/main.cpp
Normal file
@ -0,0 +1,439 @@
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "log.h"
|
||||
#include "options.h"
|
||||
#include "utils.h"
|
||||
#include "intelligentroi.h"
|
||||
#include "seamcarving.h"
|
||||
#include "facerecognizer.h"
|
||||
|
||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||
{
|
||||
const Yolo::Detection* inDetection = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(ignore && ignore == &detection)
|
||||
continue;
|
||||
|
||||
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
|
||||
{
|
||||
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
||||
inDetection = &detection;
|
||||
}
|
||||
}
|
||||
return inDetection;
|
||||
}
|
||||
|
||||
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
||||
{
|
||||
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
||||
|
||||
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
|
||||
|
||||
if(!inDetection)
|
||||
{
|
||||
const Yolo::Detection* closest = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(detection.box.x > x)
|
||||
{
|
||||
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
||||
closest = &detection;
|
||||
}
|
||||
}
|
||||
if(closest)
|
||||
x = closest->box.x;
|
||||
else
|
||||
x = imgSizeX;
|
||||
|
||||
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = inDetection->box.br().x;
|
||||
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
|
||||
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
||||
if(candidateDetection && candidateDetection->box.br().x > x)
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is again in a box";
|
||||
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is not in a box";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> out;
|
||||
|
||||
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
|
||||
|
||||
for(int x = 0; x < image.cols; ++x)
|
||||
{
|
||||
int start = x;
|
||||
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
||||
|
||||
int width = x-start;
|
||||
if(x < image.cols)
|
||||
++width;
|
||||
cv::Rect rect(start, 0, width, image.rows);
|
||||
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
|
||||
cv::Mat slice = image(rect);
|
||||
out.push_back({slice, frozen});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
|
||||
{
|
||||
assert(!slices.empty());
|
||||
|
||||
int cols = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
cols += slice.first.cols;
|
||||
|
||||
|
||||
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
|
||||
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
|
||||
|
||||
int col = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
|
||||
Log(Log::DEBUG)<<__func__<<' '<<rect;
|
||||
slice.first.copyTo(image(rect));
|
||||
col += slice.first.cols-1;
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
void transposeRect(cv::Rect& rect)
|
||||
{
|
||||
int x = rect.x;
|
||||
rect.x = rect.y;
|
||||
rect.y = x;
|
||||
|
||||
int width = rect.width;
|
||||
rect.width = rect.height;
|
||||
rect.height = width;
|
||||
}
|
||||
|
||||
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
|
||||
{
|
||||
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
|
||||
|
||||
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
||||
|
||||
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
|
||||
|
||||
bool vertical = false;
|
||||
if(aspectRatio > targetAspectRatio)
|
||||
vertical = true;
|
||||
|
||||
int requiredLines = 0;
|
||||
if(!vertical)
|
||||
requiredLines = image.rows*targetAspectRatio - image.cols;
|
||||
else
|
||||
requiredLines = image.cols/targetAspectRatio - image.rows;
|
||||
|
||||
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
||||
|
||||
if(vertical)
|
||||
{
|
||||
cv::transpose(image, image);
|
||||
for(Yolo::Detection& detection : detections)
|
||||
transposeRect(detection.box);
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
||||
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
|
||||
int totalResizableSize = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
|
||||
if(!slice.second)
|
||||
totalResizableSize += slice.first.cols;
|
||||
}
|
||||
|
||||
if(totalResizableSize < requiredLines+1)
|
||||
{
|
||||
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<int> seamsForSlice(slices.size(), 0);
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
||||
}
|
||||
|
||||
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
|
||||
for(ssize_t i = slices.size()-1; i >= 0; --i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
{
|
||||
seamsForSlice[i] += residual;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(seamsForSlice[i] != 0)
|
||||
{
|
||||
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
|
||||
if(!ret)
|
||||
{
|
||||
if(vertical)
|
||||
transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image = assembleFromSlicesHoriz(slices);
|
||||
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
cv::rectangle(image, detection.box, detection.color, 3);
|
||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
||||
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
|
||||
}
|
||||
|
||||
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
||||
}
|
||||
|
||||
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
||||
{
|
||||
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
|
||||
if(std::max(image.cols, image.rows) > longTargetSize)
|
||||
{
|
||||
if(image.cols > image.rows)
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.cols;
|
||||
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.rows;
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
InteligentRoi intRoi(yolo);
|
||||
cv::Mat image = cv::imread(path);
|
||||
if(!image.data)
|
||||
{
|
||||
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
||||
return;
|
||||
}
|
||||
|
||||
reduceSize(image, config.targetSize);
|
||||
|
||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||
|
||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||
for(Yolo::Detection& detection : detections)
|
||||
{
|
||||
bool hasmatch = false;
|
||||
if(recognizer && detection.className == "person")
|
||||
{
|
||||
cv::Mat person = image(detection.box);
|
||||
reconizerMutex.lock();
|
||||
std::pair<int, double> match = recognizer->isMatch(person);
|
||||
reconizerMutex.unlock();
|
||||
if(match.first >= 0)
|
||||
{
|
||||
detection.priority += 10;
|
||||
hasmatch = true;
|
||||
}
|
||||
}
|
||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||
}
|
||||
|
||||
cv::Rect crop;
|
||||
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.seamCarving && incompleate)
|
||||
{
|
||||
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
||||
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
||||
{
|
||||
detections = yolo.runInference(image);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat croppedImage;
|
||||
|
||||
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
|
||||
{
|
||||
intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.debug)
|
||||
{
|
||||
cv::Mat debugImage = image.clone();
|
||||
drawDebugInfo(debugImage, crop, detections);
|
||||
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else if(!incompleate)
|
||||
{
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else
|
||||
{
|
||||
croppedImage = image;
|
||||
}
|
||||
|
||||
cv::Mat resizedImage;
|
||||
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
|
||||
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
||||
for(std::filesystem::path path : images)
|
||||
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
|
||||
{
|
||||
std::vector<std::vector<T>> out;
|
||||
|
||||
size_t length = vec.size()/parts;
|
||||
size_t remain = vec.size() % parts;
|
||||
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
|
||||
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
|
||||
{
|
||||
end += (remain > 0) ? (length + !!(remain--)) : length;
|
||||
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
|
||||
begin = end;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
Log::level = Log::INFO;
|
||||
|
||||
Config config;
|
||||
argp_parse(&argp, argc, argv, 0, 0, &config);
|
||||
|
||||
if(config.outputDir.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(config.imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"at least one input image or directory is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
|
||||
for(const std::filesystem::path& path : config.imagePaths)
|
||||
getImageFiles(path, imagePaths);
|
||||
|
||||
Log(Log::DEBUG)<<"Images:";
|
||||
for(const::std::filesystem::path& path: imagePaths)
|
||||
Log(Log::DEBUG)<<path;
|
||||
|
||||
if(imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"no image was found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!std::filesystem::exists(config.outputDir))
|
||||
{
|
||||
if(!std::filesystem::create_directory(config.outputDir))
|
||||
{
|
||||
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
||||
if(config.debug)
|
||||
{
|
||||
if(!std::filesystem::exists(debugOutputPath))
|
||||
std::filesystem::create_directory(debugOutputPath);
|
||||
}
|
||||
|
||||
FaceRecognizer* recognizer = nullptr;
|
||||
std::mutex recognizerMutex;
|
||||
if(!config.focusPersonImage.empty())
|
||||
{
|
||||
cv::Mat personImage = cv::imread(config.focusPersonImage);
|
||||
if(personImage.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
|
||||
return 1;
|
||||
}
|
||||
recognizer = new FaceRecognizer();
|
||||
recognizer->addReferances({personImage});
|
||||
recognizer->setThreshold(config.threshold);
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
|
||||
|
||||
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
|
||||
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
|
||||
|
||||
for(std::thread& thread : threads)
|
||||
thread.join();
|
||||
|
||||
return 0;
|
||||
}
|
98
SmartCrop/options.h
Normal file
98
SmartCrop/options.h
Normal file
@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <argp.h>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include "log.h"
|
||||
|
||||
const char *argp_program_version = "AIImagePreprocesses";
|
||||
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
||||
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
||||
static char args_doc[] = "FILE(S)";
|
||||
|
||||
static struct argp_option options[] =
|
||||
{
|
||||
{"verbose", 'v', 0, 0, "Show debug messages" },
|
||||
{"quiet", 'q', 0, 0, "only output data" },
|
||||
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||
{"debug", 'd', 0, 0, "output debug images" },
|
||||
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
|
||||
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
|
||||
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
|
||||
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
|
||||
{0}
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
std::filesystem::path modelPath;
|
||||
std::filesystem::path classesPath;
|
||||
std::filesystem::path outputDir;
|
||||
std::filesystem::path focusPersonImage;
|
||||
bool seamCarving = false;
|
||||
bool debug = false;
|
||||
double threshold = 0.363;
|
||||
cv::Size targetSize = cv::Size(512, 512);
|
||||
};
|
||||
|
||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
Config *config = reinterpret_cast<Config*>(state->input);
|
||||
try
|
||||
{
|
||||
switch (key)
|
||||
{
|
||||
case 'q':
|
||||
Log::level = Log::ERROR;
|
||||
break;
|
||||
case 'v':
|
||||
Log::level = Log::DEBUG;
|
||||
break;
|
||||
case 'm':
|
||||
config->modelPath = arg;
|
||||
break;
|
||||
case 'c':
|
||||
config->classesPath = arg;
|
||||
break;
|
||||
case 'd':
|
||||
config->debug = true;
|
||||
break;
|
||||
case 'o':
|
||||
config->outputDir.assign(arg);
|
||||
break;
|
||||
case 's':
|
||||
config->seamCarving = true;
|
||||
break;
|
||||
case 'f':
|
||||
config->focusPersonImage = arg;
|
||||
break;
|
||||
case 't':
|
||||
config->threshold = std::atof(arg);
|
||||
break;
|
||||
case 'z':
|
||||
{
|
||||
int x = std::stoi(arg);
|
||||
config->targetSize = cv::Size(x, x);
|
||||
break;
|
||||
}
|
||||
case ARGP_KEY_ARG:
|
||||
config->imagePaths.push_back(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
}
|
||||
catch(const std::invalid_argument& ex)
|
||||
{
|
||||
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
|
||||
return ARGP_KEY_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp argp = {options, parse_opt, args_doc, doc};
|
16
SmartCrop/readfile.h
Normal file
16
SmartCrop/readfile.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
inline std::string readFile(const std::filesystem::path& path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if(!file.is_open())
|
||||
throw std::runtime_error(std::string("could not open file ") + path.string());
|
||||
std::stringstream ss;
|
||||
ss<<file.rdbuf();
|
||||
return ss.str();
|
||||
}
|
356
SmartCrop/seamcarving.cpp
Normal file
356
SmartCrop/seamcarving.cpp
Normal file
@ -0,0 +1,356 @@
|
||||
#include "seamcarving.h"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <cfloat>
|
||||
#include <vector>
|
||||
#include "log.h"
|
||||
|
||||
bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
cv::Mat newFrame = image.clone();
|
||||
assert(!newFrame.empty());
|
||||
std::vector<std::vector<int>> vecSeams;
|
||||
|
||||
for(int i = 0; i < seams; i++)
|
||||
{
|
||||
//Gradient Magnitude for intensity of image.
|
||||
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
|
||||
//Use DP to create the real energy map that is used for path calculation.
|
||||
// Strictly using vertical paths for testing simplicity.
|
||||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
||||
|
||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
||||
return false;
|
||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
||||
vecSeams.push_back(seam);
|
||||
if(seamsVect)
|
||||
seamsVect->push_back(seam);
|
||||
|
||||
newFrame = removeLeastImportantPath(newFrame, seam);
|
||||
|
||||
if(newFrame.rows == 0 || newFrame.cols == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (grow)
|
||||
{
|
||||
cv::Mat growMat = image.clone();
|
||||
|
||||
for(size_t i = 0; i < vecSeams.size(); i++)
|
||||
{
|
||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
||||
}
|
||||
image = growMat;
|
||||
}
|
||||
else
|
||||
{
|
||||
image = newFrame;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
cv::transpose(image, image);
|
||||
bool ret = strechImage(image, seams, grow, seamsVect);
|
||||
cv::transpose(image, image);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
|
||||
{
|
||||
std::vector<std::vector<int>> seamsVect;
|
||||
seamsImage = image.clone();
|
||||
|
||||
bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
|
||||
if(!ret)
|
||||
return false;
|
||||
|
||||
for(size_t i = 0; i < seamsVect.size(); ++i)
|
||||
seamsImage = drawSeam(seamsImage, seamsVect[i]);
|
||||
return true;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
|
||||
{
|
||||
// find partial derivative of x-axis and y-axis seperately
|
||||
// sum up the partial derivates
|
||||
float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1};
|
||||
cv::Mat xFilter(3, 3, CV_32FC1, pd);
|
||||
cv::Mat yFilter = xFilter.t();
|
||||
cv::Mat grayImg;
|
||||
cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY);
|
||||
cv::Mat dxImg;
|
||||
cv::Mat dyImg;
|
||||
|
||||
cv::filter2D(grayImg, dxImg, 0, xFilter);
|
||||
cv::filter2D(grayImg, dyImg, 0, yFilter);
|
||||
//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
|
||||
//cv::Mat absDxImg;
|
||||
//cv::Mat absDyImg;
|
||||
//cv::absdiff(dxImg, zeroMat, absDxImg);
|
||||
//cv::absdiff(dyImg, zeroMat, absDyImg);
|
||||
cv::Mat absDxImg = cv::abs(dxImg);
|
||||
cv::Mat absDyImg = cv::abs(dyImg);
|
||||
|
||||
cv::Mat energyImg;
|
||||
cv::add(absDxImg, absDyImg, energyImg);
|
||||
return energyImg;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame)
|
||||
{
|
||||
cv::Mat grayScale;
|
||||
cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY);
|
||||
cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1);
|
||||
cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1);
|
||||
cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1);
|
||||
Sobel(grayScale, drv, CV_16SC1, 1, 0);
|
||||
drv.convertTo(drv32f, CV_32FC1);
|
||||
cv::accumulateSquare(drv32f, mag);
|
||||
Sobel(grayScale, drv, CV_16SC1, 0, 1);
|
||||
drv.convertTo(drv32f, CV_32FC1);
|
||||
cv::accumulateSquare(drv32f, mag);
|
||||
cv::sqrt(mag, mag);
|
||||
return mag;
|
||||
}
|
||||
|
||||
float SeamCarving::intensity(float currIndex, int start, int end)
|
||||
{
|
||||
if(start < 0 || start >= end)
|
||||
{
|
||||
return FLT_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
return currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap)
|
||||
{
|
||||
cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1);
|
||||
|
||||
if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0)
|
||||
{
|
||||
return cv::Mat();
|
||||
}
|
||||
|
||||
//First row of intensity paths is the same as the energy map
|
||||
rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0));
|
||||
float max = 0;
|
||||
|
||||
//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
|
||||
for(int row = 1; row < pathIntensityMap.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < pathIntensityMap.cols; col++)
|
||||
{
|
||||
//The initial intensity of the pixel is its raw intensity
|
||||
float pixelIntensity = rawEnergyMap.at<float>(row, col);
|
||||
//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
|
||||
float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols);
|
||||
float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols);
|
||||
float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols);
|
||||
|
||||
float minIntensity = std::min(p1, p2);
|
||||
minIntensity = std::min(minIntensity, p3);
|
||||
|
||||
pixelIntensity += minIntensity;
|
||||
|
||||
max = std::max(max, pixelIntensity);
|
||||
pathIntensityMap.at<float>(row, col) = pixelIntensity;
|
||||
}
|
||||
}
|
||||
return pathIntensityMap;
|
||||
}
|
||||
|
||||
std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap)
|
||||
{
|
||||
if(importanceMap.total() == 0)
|
||||
{
|
||||
return std::vector<int>();
|
||||
}
|
||||
|
||||
//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
|
||||
float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0);
|
||||
int minCol = 0;
|
||||
for (int col = 1; col < importanceMap.cols; col++)
|
||||
{
|
||||
float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col);
|
||||
if(currPixel < minImportance)
|
||||
{
|
||||
minCol = col;
|
||||
minImportance = currPixel;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> leastEnergySeam(importanceMap.rows);
|
||||
leastEnergySeam[importanceMap.rows-1] = minCol;
|
||||
for(int row = importanceMap.rows - 2; row >= 0; row--)
|
||||
{
|
||||
float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols);
|
||||
float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols);
|
||||
float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols);
|
||||
//Adjust the min column for path following
|
||||
if(p1 < p2 && p1 < p3)
|
||||
{
|
||||
minCol -= 1;
|
||||
}
|
||||
else if(p3 < p1 && p3 < p2)
|
||||
{
|
||||
minCol += 1;
|
||||
}
|
||||
leastEnergySeam[row] = minCol;
|
||||
}
|
||||
|
||||
return leastEnergySeam;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Size orgSize = original.size();
|
||||
// new mat needs to shrink by one collumn
|
||||
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
removePixel(original, newMat, row, seam[row]);
|
||||
}
|
||||
return newMat;
|
||||
}
|
||||
|
||||
void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
|
||||
{
|
||||
int width = original.cols;
|
||||
int channels = original.channels();
|
||||
int originRowStart = row * channels * width;
|
||||
int newRowStart = row * channels * (width - 1);
|
||||
int firstNum = minCol * channels;
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = outputMat.data;
|
||||
|
||||
//std::cout << "originRowStart: " << originRowStart << std::endl;
|
||||
//std::cout << "newRowStart: " << newRowStart << std::endl;
|
||||
//std::cout << "firstNum: " << firstNum << std::endl;
|
||||
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
|
||||
|
||||
int originRowMid = originRowStart + (minCol + 1) * channels;
|
||||
int newRowMid = newRowStart + minCol * channels;
|
||||
int secondNum = (width - 1) * channels - firstNum;
|
||||
|
||||
//std::cout << "originRowMid: " << originRowMid << std::endl;
|
||||
//std::cout << "newRowMid: " << newRowMid << std::endl;
|
||||
//std::cout << "secondNum: " << secondNum << std::endl;
|
||||
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
|
||||
|
||||
int leftPixel = minCol - 1;
|
||||
int rightPixel = minCol + 1;
|
||||
|
||||
int byte1 = rawOrig[originRowStart + minCol * channels];
|
||||
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
|
||||
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
|
||||
|
||||
if (rightPixel < width)
|
||||
{
|
||||
int byte1R = rawOrig[originRowStart + rightPixel * channels];
|
||||
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
|
||||
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
|
||||
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
|
||||
}
|
||||
|
||||
if(leftPixel >= 0)
|
||||
{
|
||||
int byte1L = rawOrig[originRowStart + leftPixel*channels];
|
||||
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
|
||||
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
|
||||
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Size orgSize = original.size();
|
||||
// new mat needs to grow by one column
|
||||
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
|
||||
addPixel(original, newMat, row, seam[row]);
|
||||
}
|
||||
return newMat;
|
||||
}
|
||||
|
||||
void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
|
||||
{
|
||||
int width = original.cols;
|
||||
int channels = original.channels();
|
||||
int originRowStart = row * channels * width;
|
||||
int newRowStart = row * channels * (width + 1);
|
||||
int firstNum = (minCol + 1) * channels;
|
||||
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = outputMat.data;
|
||||
|
||||
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
|
||||
|
||||
memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels);
|
||||
|
||||
int originRowMid = originRowStart + ((minCol + 1) * channels);
|
||||
int newRowMid = newRowStart + ((minCol + 2) * channels);
|
||||
int secondNum = (width * channels) - firstNum;
|
||||
|
||||
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
|
||||
|
||||
int leftPixel = minCol - 1;
|
||||
int rightPixel = minCol + 1;
|
||||
|
||||
int byte1 = rawOrig[originRowStart + minCol * channels];
|
||||
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
|
||||
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
|
||||
|
||||
if (rightPixel < width)
|
||||
{
|
||||
int byte1R = rawOrig[originRowStart + rightPixel * channels];
|
||||
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
|
||||
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
|
||||
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
|
||||
}
|
||||
|
||||
if(leftPixel >= 0)
|
||||
{
|
||||
int byte1L = rawOrig[originRowStart + leftPixel*channels];
|
||||
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
|
||||
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
|
||||
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Mat retMat = frame.clone();
|
||||
for(int row = 0; row < frame.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < frame.cols; col++)
|
||||
{
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
||||
}
|
||||
}
|
||||
return retMat;
|
||||
}
|
24
SmartCrop/seamcarving.h
Normal file
24
SmartCrop/seamcarving.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <vector>
|
||||
|
||||
class SeamCarving
|
||||
{
|
||||
private:
|
||||
static cv::Mat GetEnergyImg(const cv::Mat &img);
|
||||
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
||||
static float intensity(float currIndex, int start, int end);
|
||||
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
||||
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
||||
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
||||
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
||||
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
|
||||
|
||||
public:
|
||||
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
|
||||
};
|
26
SmartCrop/tokenize.cpp
Normal file
26
SmartCrop/tokenize.cpp
Normal file
@ -0,0 +1,26 @@
|
||||
#include "tokenize.h"
|
||||
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string token;
|
||||
bool inBaracket = false;
|
||||
for(size_t i = 0; i < str.size(); ++i)
|
||||
{
|
||||
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
|
||||
{
|
||||
tokens.push_back(token);
|
||||
token.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
token.push_back(str[i]);
|
||||
}
|
||||
if(ignoreBraket == str[i])
|
||||
inBaracket = !inBaracket;
|
||||
}
|
||||
if(!inBaracket)
|
||||
tokens.push_back(token);
|
||||
return tokens;
|
||||
}
|
7
SmartCrop/tokenize.h
Normal file
7
SmartCrop/tokenize.h
Normal file
@ -0,0 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
|
||||
const char escapeChar = '\0');
|
60
SmartCrop/utils.cpp
Normal file
60
SmartCrop/utils.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include "utils.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path)
|
||||
{
|
||||
return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg");
|
||||
}
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths)
|
||||
{
|
||||
if(isImagePath(path))
|
||||
{
|
||||
paths.push_back(path);
|
||||
}
|
||||
else if(std::filesystem::is_directory(path))
|
||||
{
|
||||
for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path))
|
||||
{
|
||||
if(std::filesystem::is_directory(dirent.path()))
|
||||
getImageFiles(dirent.path(), paths);
|
||||
else if(isImagePath(dirent.path()))
|
||||
paths.push_back(dirent.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points)
|
||||
{
|
||||
int left = std::numeric_limits<int>::max();
|
||||
int right = std::numeric_limits<int>::min();
|
||||
int top = std::numeric_limits<int>::max();
|
||||
int bottom = std::numeric_limits<int>::min();
|
||||
|
||||
for(const std::pair<cv::Point, int>& point : points)
|
||||
{
|
||||
left = point.first.x < left ? point.first.x : left;
|
||||
right = point.first.x > right ? point.first.x : right;
|
||||
|
||||
top = point.first.y < top ? point.first.y : top;
|
||||
bottom = point.first.y > bottom ? point.first.y : bottom;
|
||||
}
|
||||
|
||||
return cv::Rect(left, top, right-left, bottom-top);
|
||||
}
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB)
|
||||
{
|
||||
cv::Vec2i a(pointA.x, pointA.y);
|
||||
cv::Vec2i b(pointB.x, pointB.y);
|
||||
return cv::norm(a-b);
|
||||
}
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect)
|
||||
{
|
||||
return point.x >= rect.x && point.x <= rect.x+rect.width &&
|
||||
point.y >= rect.y && point.y <= rect.y+rect.height;
|
||||
}
|
15
SmartCrop/utils.h
Normal file
15
SmartCrop/utils.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path);
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
|
258
SmartCrop/yolo.cpp
Normal file
258
SmartCrop/yolo.cpp
Normal file
@ -0,0 +1,258 @@
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "readfile.h"
|
||||
#include "tokenize.h"
|
||||
#include "log.h"
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
|
||||
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
|
||||
|
||||
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
||||
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
||||
{
|
||||
modelPath = onnxModelPath;
|
||||
modelShape = modelInputShape;
|
||||
|
||||
if(classesTxtFilePath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin classes";
|
||||
loadClasses(rdefaultClassesData);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string classesStr = readFile(classesTxtFilePath);
|
||||
loadClasses(classesStr);
|
||||
}
|
||||
|
||||
if(!modelPath.empty())
|
||||
{
|
||||
net = cv::dnn::readNetFromONNX(modelPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin yolo model";
|
||||
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
|
||||
}
|
||||
if(runWithOCl)
|
||||
{
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
||||
}
|
||||
else
|
||||
{
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
|
||||
{
|
||||
cv::Mat modelInput = input;
|
||||
if (letterBoxForSquare && modelShape.width == modelShape.height)
|
||||
modelInput = formatToSquare(modelInput);
|
||||
|
||||
cv::Mat blob;
|
||||
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
|
||||
net.setInput(blob);
|
||||
|
||||
std::vector<cv::Mat> outputs;
|
||||
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
||||
|
||||
int rows = outputs[0].size[1];
|
||||
int dimensions = outputs[0].size[2];
|
||||
|
||||
bool yolov8 = false;
|
||||
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
||||
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
||||
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
|
||||
{
|
||||
yolov8 = true;
|
||||
rows = outputs[0].size[2];
|
||||
dimensions = outputs[0].size[1];
|
||||
|
||||
outputs[0] = outputs[0].reshape(1, dimensions);
|
||||
cv::transpose(outputs[0], outputs[0]);
|
||||
}
|
||||
float *data = (float *)outputs[0].data;
|
||||
|
||||
float x_factor = modelInput.cols / modelShape.width;
|
||||
float y_factor = modelInput.rows / modelShape.height;
|
||||
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
if (yolov8)
|
||||
{
|
||||
float *classes_scores = data+4;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double maxClassScore;
|
||||
|
||||
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||
|
||||
if (maxClassScore > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(maxClassScore);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
else // yolov5
|
||||
{
|
||||
float confidence = data[4];
|
||||
|
||||
if (confidence >= modelConfidenceThreshold)
|
||||
{
|
||||
float *classes_scores = data+5;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double max_class_score;
|
||||
|
||||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||||
|
||||
if (max_class_score > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(confidence);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data += dimensions;
|
||||
}
|
||||
|
||||
std::vector<int> nms_result;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
|
||||
|
||||
std::vector<Yolo::Detection> detections{};
|
||||
for(unsigned long i = 0; i < nms_result.size(); ++i)
|
||||
{
|
||||
int idx = nms_result[i];
|
||||
|
||||
Yolo::Detection result;
|
||||
result.class_id = class_ids[idx];
|
||||
result.confidence = confidences[idx];
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<int> dis(100, 255);
|
||||
result.color = cv::Scalar(dis(gen),
|
||||
dis(gen),
|
||||
dis(gen));
|
||||
|
||||
result.className = classes[result.class_id].first;
|
||||
result.priority = classes[result.class_id].second;
|
||||
clampBox(boxes[idx], input.size());
|
||||
result.box = boxes[idx];
|
||||
detections.push_back(result);
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
|
||||
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
|
||||
{
|
||||
if(box.x < 0)
|
||||
{
|
||||
box.width += box.x;
|
||||
box.x = 0;
|
||||
}
|
||||
if(box.y < 0)
|
||||
{
|
||||
box.height += box.y;
|
||||
box.y = 0;
|
||||
}
|
||||
if(box.x+box.width > size.width)
|
||||
box.width = size.width - box.x;
|
||||
if(box.y+box.height > size.height)
|
||||
box.height = size.height - box.y;
|
||||
}
|
||||
|
||||
void Yolo::loadClasses(const std::string& classesStr)
|
||||
{
|
||||
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
|
||||
classes.clear();
|
||||
for(std::string& instance : candidateClasses)
|
||||
{
|
||||
if(instance.size() < 2)
|
||||
continue;
|
||||
|
||||
std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\');
|
||||
|
||||
if(*tokens[0].begin() == '"')
|
||||
instance.erase(tokens[0].begin());
|
||||
if(tokens[0].back() == '"')
|
||||
tokens[0].pop_back();
|
||||
int priority = -1;
|
||||
if(tokens.size() > 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
priority = std::stoi(tokens[1]);
|
||||
}
|
||||
catch(const std::invalid_argument& err)
|
||||
{
|
||||
Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what();
|
||||
}
|
||||
}
|
||||
classes.push_back({tokens[0], priority});
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
|
||||
{
|
||||
int col = source.cols;
|
||||
int row = source.rows;
|
||||
int _max = MAX(col, row);
|
||||
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
||||
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
||||
return result;
|
||||
}
|
||||
|
||||
int Yolo::getClassForStr(const std::string& str) const
|
||||
{
|
||||
for(size_t i = 0; i < classes.size(); ++i)
|
||||
{
|
||||
if(classes[i].first == str)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
46
SmartCrop/yolo.h
Normal file
46
SmartCrop/yolo.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <filesystem>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
class Yolo
|
||||
{
|
||||
public:
|
||||
struct Detection
|
||||
{
|
||||
int class_id = 0;
|
||||
std::string className;
|
||||
float confidence = 0.0;
|
||||
int priority = -1;
|
||||
cv::Scalar color;
|
||||
cv::Rect box;
|
||||
};
|
||||
|
||||
private:
|
||||
static constexpr float modelConfidenceThreshold = 0.25;
|
||||
static constexpr float modelScoreThreshold = 0.45;
|
||||
static constexpr float modelNMSThreshold = 0.50;
|
||||
|
||||
std::string modelPath;
|
||||
std::vector<std::pair<std::string, int>> classes;
|
||||
cv::Size2f modelShape;
|
||||
bool letterBoxForSquare = true;
|
||||
cv::dnn::Net net;
|
||||
|
||||
void loadClasses(const std::string& classes);
|
||||
void loadOnnxNetwork(const std::filesystem::path& path);
|
||||
cv::Mat formatToSquare(const cv::Mat &source);
|
||||
static void clampBox(cv::Rect& box, const cv::Size& size);
|
||||
|
||||
public:
|
||||
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
|
||||
const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true);
|
||||
std::vector<Detection> runInference(const cv::Mat &input);
|
||||
int getClassForStr(const std::string& str) const;
|
||||
};
|
Reference in New Issue
Block a user