initial commit

This commit is contained in:
2024-06-14 08:54:09 +02:00
commit cd1e2756bc
39 changed files with 4163 additions and 0 deletions

16
SmartCrop/CMakeLists.txt Normal file
View File

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.6)
find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
add_executable(smartcrop ${SRC_FILES})
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(smartcrop PRIVATE -s -g -Wall)
message(WARNING ${WEIGHT_DIR})
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
install(TARGETS smartcrop RUNTIME DESTINATION bin)

50
SmartCrop/README.md Normal file
View File

@ -0,0 +1,50 @@
# SmartCrop
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
## Requirements
* [cmake](https://cmake.org/) 3.6 or later
* [opencv](https://opencv.org/) 4.8 or later
* A c++17 capable compiler and standard lib like gcc or llvm/clang
* git is required to get the source
## Building
The steps to build this application are:
$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
$ cd SDImagePreprocess
$ mkdir build
$ cmake ..
$ make
The binary can then be found in build/SmartCrop and can optionaly be installed with:
$ sudo make install
## Basic usage
To process all images in the directory ~/images and output the images into ~/proceesedImages:
$ smartcrop --out processedImages ~/images/*
To also focus on the person in the image ~/person.jpg
$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
To also enable seam carving
$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
see smartcrop --help for more
## Example
#### Content detected in image:
![Content found in image](images/IMGP3692.jpg)
#### Cropped image based on content:
![Cropped image](images/IMGP3692C.jpg)

View File

@ -0,0 +1,163 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "facerecognizer.h"
#include <filesystem>
#define INCBIN_PREFIX r
#include "incbin.h"
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include "log.h"
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
{
if(detectorPath.empty())
{
Log(Log::INFO)<<"Using builtin face detection model";
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from built in file");
}
else
{
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from "+detectorPath.string());
}
bool defaultNetwork = recognizerPath.empty();
if(defaultNetwork)
{
Log(Log::INFO)<<"Using builtin face recognition model";
recognizerPath = cv::tempfile("onnx");
std::ofstream file(recognizerPath);
if(!file.is_open())
throw LoadException("Unable open temporary file at "+recognizerPath.string());
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
file.close();
}
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(defaultNetwork)
std::filesystem::remove(recognizerPath);
if(!recognizer)
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
addReferances(referances);
}
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
{
detector->setInputSize(input.size());
cv::Mat faces;
detector->detect(input, faces);
return faces;
}
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
{
bool ret = false;
for(const cv::Mat& image : referances)
{
cv::Mat faces = detectFaces(image);
assert(faces.cols == 15);
if(faces.empty())
{
Log(Log::WARN)<<"A referance image provided dose not contian any face";
continue;
}
if(faces.rows > 1)
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
cv::Mat cropedImage;
recognizer->alignCrop(image, faces.row(0), cropedImage);
cv::Mat features;
recognizer->feature(cropedImage, features);
referanceFeatures.push_back(features.clone());
ret = true;
}
return ret;
}
void FaceRecognizer::setThreshold(double threasholdIn)
{
threshold = threasholdIn;
}
double FaceRecognizer::getThreshold()
{
return threshold;
}
void FaceRecognizer::clearReferances()
{
referanceFeatures.clear();
}
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
{
cv::Mat faces = detectFaces(input);
Detection bestMatch;
bestMatch.confidence = 0;
bestMatch.person = -1;
if(alone && faces.rows > 1)
{
bestMatch.person = -2;
return bestMatch;
}
for(int i = 0; i < faces.rows; ++i)
{
cv::Mat face;
recognizer->alignCrop(input, faces.row(i), face);
cv::Mat features;
recognizer->feature(face, features);
features = features.clone();
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
{
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
if(score > threshold && score > bestMatch.confidence)
{
bestMatch.confidence = score;
bestMatch.person = referanceIndex;
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
}
}
}
return bestMatch;
}

View File

@ -0,0 +1,67 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <exception>
#include <opencv2/core/mat.hpp>
#include <opencv2/objdetect/face.hpp>
#include <opencv2/core.hpp>
#include <vector>
#include <memory>
#include <filesystem>
class FaceRecognizer
{
public:
struct Detection
{
int person;
float confidence;
cv::Rect rect;
};
class LoadException : public std::exception
{
private:
std::string message;
public:
LoadException(const std::string& msg): std::exception(), message(msg) {}
virtual const char* what() const throw() override
{
return message.c_str();
}
};
private:
std::vector<cv::Mat> referanceFeatures;
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
std::shared_ptr<cv::FaceDetectorYN> detector;
double threshold = 0.363;
public:
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
cv::Mat detectFaces(const cv::Mat& input);
Detection isMatch(const cv::Mat& input, bool alone = false);
bool addReferances(const std::vector<cv::Mat>& referances);
void setThreshold(double threashold);
double getThreshold();
void clearReferances();
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 251 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

495
SmartCrop/incbin.h Normal file
View File

@ -0,0 +1,495 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file incbin.h
* @author Dale Weiler
* @brief Utility for including binary files
*
* Facilities for including binary files into the current translation unit and
* making use from them externally in other translation units.
*/
#ifndef INCBIN_HDR
#define INCBIN_HDR
#include <limits.h>
#if defined(__AVX512BW__) || \
defined(__AVX512CD__) || \
defined(__AVX512DQ__) || \
defined(__AVX512ER__) || \
defined(__AVX512PF__) || \
defined(__AVX512VL__) || \
defined(__AVX512F__)
# define INCBIN_ALIGNMENT_INDEX 6
#elif defined(__AVX__) || \
defined(__AVX2__)
# define INCBIN_ALIGNMENT_INDEX 5
#elif defined(__SSE__) || \
defined(__SSE2__) || \
defined(__SSE3__) || \
defined(__SSSE3__) || \
defined(__SSE4_1__) || \
defined(__SSE4_2__) || \
defined(__neon__) || \
defined(__ARM_NEON) || \
defined(__ALTIVEC__)
# define INCBIN_ALIGNMENT_INDEX 4
#elif ULONG_MAX != 0xffffffffu
# define INCBIN_ALIGNMENT_INDEX 3
# else
# define INCBIN_ALIGNMENT_INDEX 2
#endif
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
#define INCBIN_ALIGN_SHIFT_0 1
#define INCBIN_ALIGN_SHIFT_1 2
#define INCBIN_ALIGN_SHIFT_2 4
#define INCBIN_ALIGN_SHIFT_3 8
#define INCBIN_ALIGN_SHIFT_4 16
#define INCBIN_ALIGN_SHIFT_5 32
#define INCBIN_ALIGN_SHIFT_6 64
/* Actual alignment value */
#define INCBIN_ALIGNMENT \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
INCBIN_ALIGNMENT_INDEX)
/* Stringize */
#define INCBIN_STR(X) \
#X
#define INCBIN_STRINGIZE(X) \
INCBIN_STR(X)
/* Concatenate */
#define INCBIN_CAT(X, Y) \
X ## Y
#define INCBIN_CONCATENATE(X, Y) \
INCBIN_CAT(X, Y)
/* Deferred macro expansion */
#define INCBIN_EVAL(X) \
X
#define INCBIN_INVOKE(N, ...) \
INCBIN_EVAL(N(__VA_ARGS__))
/* Variable argument count for overloading by arity */
#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
/* Green Hills uses a different directive for including binary data */
#if defined(__ghs__)
# if (__ghs_asm == 2)
# define INCBIN_MACRO ".file"
/* Or consider the ".myrawdata" entry in the ld file */
# else
# define INCBIN_MACRO "\tINCBIN"
# endif
#else
# define INCBIN_MACRO ".incbin"
#endif
#ifndef _MSC_VER
# define INCBIN_ALIGN \
__attribute__((aligned(INCBIN_ALIGNMENT)))
#else
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
#endif
#if defined(__arm__) || /* GNU C and RealView */ \
defined(__arm) || /* Diab */ \
defined(_ARM) /* ImageCraft */
# define INCBIN_ARM
#endif
#ifdef __GNUC__
/* Utilize .balign where supported */
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".balign 1\n"
#elif defined(INCBIN_ARM)
/*
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
* the shift count. This is the value passed to `.align'
*/
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
# define INCBIN_ALIGN_BYTE ".align 0\n"
#else
/* We assume other inline assembler's treat `.align' as `.balign' */
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".align 1\n"
#endif
/* INCBIN_CONST is used by incbin.c generated files */
#if defined(__cplusplus)
# define INCBIN_EXTERNAL extern "C"
# define INCBIN_CONST extern const
#else
# define INCBIN_EXTERNAL extern
# define INCBIN_CONST const
#endif
/**
* @brief Optionally override the linker section into which size and data is
* emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*/
#if !defined(INCBIN_OUTPUT_SECTION)
# if defined(__APPLE__)
# define INCBIN_OUTPUT_SECTION ".const_data"
# else
# define INCBIN_OUTPUT_SECTION ".rodata"
# endif
#endif
/**
* @brief Optionally override the linker section into which data is emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*/
#if !defined(INCBIN_OUTPUT_DATA_SECTION)
# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
#endif
/**
* @brief Optionally override the linker section into which size is emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*
* @note This is useful for Harvard architectures where program memory cannot
* be directly read from the program without special instructions. With this you
* can chose to put the size variable in RAM rather than ROM.
*/
#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
#endif
#if defined(__APPLE__)
# include "TargetConditionals.h"
# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
# endif
/* The directives are different for Apple branded compilers */
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# define INCBIN_INT ".long "
# define INCBIN_MANGLE "_"
# define INCBIN_BYTE ".byte "
# define INCBIN_TYPE(...)
#else
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# if defined(__ghs__)
# define INCBIN_INT ".word "
# else
# define INCBIN_INT ".int "
# endif
# if defined(__USER_LABEL_PREFIX__)
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
# else
# define INCBIN_MANGLE ""
# endif
# if defined(INCBIN_ARM)
/* On arm assemblers, `@' is used as a line comment token */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
# elif defined(__MINGW32__) || defined(__MINGW64__)
/* Mingw doesn't support this directive either */
# define INCBIN_TYPE(NAME)
# else
/* It's safe to use `@' on other architectures */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
# endif
# define INCBIN_BYTE ".byte "
#endif
/* List of style types used for symbol names */
#define INCBIN_STYLE_CAMEL 0
#define INCBIN_STYLE_SNAKE 1
/**
* @brief Specify the prefix to use for symbol names.
*
* @note By default this is "g".
*
* @code
* #define INCBIN_PREFIX incbin
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols instead:
* // const unsigned char incbinFoo<data>[];
* // const unsigned char *const incbinFoo<end>;
* // const unsigned int incbinFoo<size>;
* @endcode
*/
#if !defined(INCBIN_PREFIX)
# define INCBIN_PREFIX g
#endif
/**
* @brief Specify the style used for symbol names.
*
* Possible options are
* - INCBIN_STYLE_CAMEL "CamelCase"
* - INCBIN_STYLE_SNAKE "snake_case"
*
* @note By default this is INCBIN_STYLE_CAMEL
*
* @code
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
* #include "incbin.h"
* INCBIN(foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>foo_data[];
* // const unsigned char *const <prefix>foo_end;
* // const unsigned int <prefix>foo_size;
* @endcode
*/
#if !defined(INCBIN_STYLE)
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
#endif
/* Style lookup tables */
#define INCBIN_STYLE_0_DATA Data
#define INCBIN_STYLE_0_END End
#define INCBIN_STYLE_0_SIZE Size
#define INCBIN_STYLE_1_DATA _data
#define INCBIN_STYLE_1_END _end
#define INCBIN_STYLE_1_SIZE _size
/* Style lookup: returning identifier */
#define INCBIN_STYLE_IDENT(TYPE) \
INCBIN_CONCATENATE( \
INCBIN_STYLE_, \
INCBIN_CONCATENATE( \
INCBIN_EVAL(INCBIN_STYLE), \
INCBIN_CONCATENATE(_, TYPE)))
/* Style lookup: returning string literal */
#define INCBIN_STYLE_STRING(TYPE) \
INCBIN_STRINGIZE( \
INCBIN_STYLE_IDENT(TYPE)) \
/* Generate the global labels by indirectly invoking the macro with our style
* type and concatenating the name against them. */
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
INCBIN_INVOKE( \
INCBIN_GLOBAL, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE))) \
INCBIN_INVOKE( \
INCBIN_TYPE, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE)))
/**
* @brief Externally reference binary data included in another translation unit.
*
* Produces three external symbols that reference the binary data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
* @param NAME The name given for the binary data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const unsigned char <prefix>Foo<data>[];
* // extern const unsigned char *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*
* You may specify a custom optional data type as well as the first argument.
* @code
* INCBIN_EXTERN(custom_type, Foo);
*
* // Now you have the following symbols:
* // extern const custom_type <prefix>Foo<data>[];
* // extern const custom_type *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*/
#define INCBIN_EXTERN(...) \
INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
#define INCBIN_EXTERN_1(NAME, ...) \
INCBIN_EXTERN_2(unsigned char, NAME)
#define INCBIN_EXTERN_2(TYPE, NAME) \
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(DATA))[]; \
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(END)); \
INCBIN_EXTERNAL const unsigned int \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(SIZE))
/**
* @brief Externally reference textual data included in another translation unit.
*
* Produces three external symbols that reference the textual data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name given for the textual data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const char <prefix>Foo<data>[];
* // extern const char *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*/
#define INCTXT_EXTERN(NAME) \
INCBIN_EXTERN_2(char, NAME)
/**
* @brief Include a binary file into the current translation unit.
*
* Includes a binary file into the current translation unit, producing three symbols
* for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCBIN(Icon, "icon.png");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>Icon<data>[];
* // const unsigned char *const <prefix>Icon<end>;
* // const unsigned int <prefix>Icon<size>;
* @endcode
*
* You may specify a custom optional data type as well as the first argument.
* These macros are specialized by arity.
* @code
* INCBIN(custom_type, Icon, "icon.png");
*
* // Now you have the following symbols:
* // const custom_type <prefix>Icon<data>[];
* // const custom_type *const <prefix>Icon<end>;
* // const unsigned int <prefix>Icon<size>;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#ifdef _MSC_VER
# define INCBIN(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
# define INCBIN(...) \
INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
# if defined(__GNUC__)
# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
# elif defined(__clang__)
# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
# else
# define INCBIN_1(...) /* Cannot do anything here */
# endif
# define INCBIN_2(NAME, FILENAME) \
INCBIN_3(unsigned char, NAME, FILENAME)
# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
__asm__(INCBIN_SECTION \
INCBIN_GLOBAL_LABELS(NAME, DATA) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
INCBIN_MACRO " \"" FILENAME "\"\n" \
TERMINATOR \
INCBIN_GLOBAL_LABELS(NAME, END) \
INCBIN_ALIGN_BYTE \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
INCBIN_BYTE "1\n" \
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
INCBIN_ALIGN_HOST \
".text\n" \
); \
INCBIN_EXTERN(TYPE, NAME)
#endif
/**
* @brief Include a textual file into the current translation unit.
*
* This behaves the same as INCBIN except it produces char compatible arrays
* and implicitly adds a null-terminator byte, thus the size of data included
* by this is one byte larger than that of INCBIN.
*
* Includes a textual file into the current translation unit, producing three
* symbols for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCTXT(Readme, "readme.txt");
*
* // Now you have the following symbols:
* // const char <prefix>Readme<data>[];
* // const char *const <prefix>Readme<end>;
* // const unsigned int <prefix>Readme<size>;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#if defined(_MSC_VER)
# define INCTXT(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
# define INCTXT(NAME, FILENAME) \
INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
#endif
#endif

View File

@ -0,0 +1,128 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "intelligentroi.h"
#include <opencv2/imgproc.hpp>
#include "utils.h"
#include "log.h"
bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center)
{
if(a.second != b.second)
return a.second > b.second;
double distA = pointDist(a.first, center);
double distB = pointDist(b.first, center);
return distA < distB;
}
void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
{
if(!pointInRect(point, rect))
{
if(point.x < rect.x)
rect.x = point.x;
else if(point.x > rect.x+rect.width)
rect.x = point.x-rect.width;
if(point.y < rect.y)
rect.y = point.y;
else if(point.y > rect.y+rect.height)
rect.y = point.y-rect.height;
}
}
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
{
incompleate = false;
int diameter = std::min(imageSize.height, imageSize.width);
cv::Point2i point(imageSize.width/2, imageSize.height/2);
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
std::sort(mustInclude.begin(), mustInclude.end(),
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
while(true)
{
cv::Rect includeRect = rectFromPoints(mustInclude);
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
{
incompleate = true;
slideRectToPoint(candiate, mustInclude.back().first);
mustInclude.pop_back();
Log(Log::DEBUG)<<"cant fill";
for(const std::pair<cv::Point2i, int>& mipoint : mustInclude)
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
}
else
{
break;
}
}
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
slideRectToPoint(candiate, includePoint.first);
if(candiate.x < 0)
candiate.x = 0;
if(candiate.y < 0)
candiate.y = 0;
if(candiate.x+candiate.width > imageSize.width)
candiate.width = imageSize.width-candiate.x;
if(candiate.y+candiate.height > imageSize.height)
candiate.height = imageSize.height-candiate.y;
return candiate;
}
InteligentRoi::InteligentRoi(const Yolo& yolo)
{
personId = yolo.getClassForStr("person");
}
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
{
std::vector<std::pair<cv::Point2i, int>> corners;
for(size_t i = 0; i < detections.size(); ++i)
{
int priority = detections[i].priority;
if(detections[i].class_id == personId)
{
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
corners.push_back({detections[i].box.tl(), priority+1});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
else
{
corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
}
bool incompleate;
out = maxRect(incompleate, imageSize, corners);
return incompleate;
}

View File

@ -0,0 +1,37 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <opencv2/imgproc.hpp>
#include "yolo.h"
class InteligentRoi
{
private:
int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public:
InteligentRoi(const Yolo& yolo);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
};

63
SmartCrop/log.cpp Normal file
View File

@ -0,0 +1,63 @@
/**
* Lubricant Detecter
* Copyright (C) 2021 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "log.h"
Log::Log(Level type, bool endlineI): endline(endlineI)
{
msglevel = type;
if(headers)
{
operator << ("["+getLabel(type)+"] ");
}
}
Log::~Log()
{
if(opened && endline)
{
std::cout<<'\n';
}
opened = false;
}
std::string Log::getLabel(Level level)
{
std::string label;
switch(level)
{
case DEBUG:
label = "DEBUG";
break;
case INFO:
label = "INFO ";
break;
case WARN:
label = "WARN ";
break;
case ERROR:
label = "ERROR";
break;
}
return label;
}
bool Log::headers = false;
Log::Level Log::level = WARN;

64
SmartCrop/log.h Normal file
View File

@ -0,0 +1,64 @@
/**
* eisgenerator
* Copyright (C) 2021 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <iostream>
#include <string>
class Log
{
public:
enum Level
{
DEBUG,
INFO,
WARN,
ERROR
};
private:
bool opened = false;
Level msglevel = DEBUG;
bool endline = true;
std::string getLabel(Level level);
public:
static bool headers;
static Level level;
Log() {}
Log(Level type, bool endlineI = true);
~Log();
template<class T> Log &operator<<(const T &msg)
{
if(msglevel >= level)
{
if(msglevel == ERROR)
std::cerr<<msg;
else
std::cout<<msg;
opened = true;
}
return *this;
}
};

460
SmartCrop/main.cpp Normal file
View File

@ -0,0 +1,460 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include <filesystem>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <algorithm>
#include <execution>
#include <string>
#include <vector>
#include <numeric>
#include "yolo.h"
#include "log.h"
#include "options.h"
#include "utils.h"
#include "intelligentroi.h"
#include "seamcarving.h"
#include "facerecognizer.h"
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(ignore && ignore == &detection)
continue;
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
{
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
{
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.x > x)
{
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
closest = &detection;
}
}
if(closest)
x = closest->box.x;
else
x = imgSizeX;
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
return false;
}
else
{
x = inDetection->box.br().x;
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().x > x)
{
Log(Log::DEBUG)<<"it is again in a box";
return findRegionEndpointHoriz(x, detections, imgSizeX);
}
else
{
Log(Log::DEBUG)<<"it is not in a box";
return true;
}
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
for(int x = 0; x < image.cols; ++x)
{
int start = x;
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
int width = x-start;
if(x < image.cols)
++width;
cv::Rect rect(start, 0, width, image.rows);
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
cv::Mat slice = image(rect);
out.push_back({slice, frozen});
}
return out;
}
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
{
assert(!slices.empty());
int cols = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
cols += slice.first.cols;
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
int col = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
Log(Log::DEBUG)<<__func__<<' '<<rect;
slice.first.copyTo(image(rect));
col += slice.first.cols-1;
}
return image;
}
void transposeRect(cv::Rect& rect)
{
int x = rect.x;
rect.x = rect.y;
rect.y = x;
int width = rect.width;
rect.width = rect.height;
rect.height = width;
}
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
{
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
double aspectRatio = image.cols/static_cast<double>(image.rows);
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
bool vertical = false;
if(aspectRatio > targetAspectRatio)
vertical = true;
int requiredLines = 0;
if(!vertical)
requiredLines = image.rows*targetAspectRatio - image.cols;
else
requiredLines = image.cols/targetAspectRatio - image.rows;
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
if(vertical)
{
cv::transpose(image, image);
for(Yolo::Detection& detection : detections)
transposeRect(detection.box);
}
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
int totalResizableSize = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
if(!slice.second)
totalResizableSize += slice.first.cols;
}
if(totalResizableSize < requiredLines+1)
{
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
if(vertical)
cv::transpose(image, image);
return false;
}
std::vector<int> seamsForSlice(slices.size(), 0);
for(size_t i = 0; i < slices.size(); ++i)
{
if(!slices[i].second)
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
}
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
for(ssize_t i = slices.size()-1; i >= 0; --i)
{
if(!slices[i].second)
{
seamsForSlice[i] += residual;
break;
}
}
for(size_t i = 0; i < slices.size(); ++i)
{
if(seamsForSlice[i] != 0)
{
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
if(!ret)
{
if(vertical)
transpose(image, image);
return false;
}
}
}
image = assembleFromSlicesHoriz(slices);
if(vertical)
cv::transpose(image, image);
return true;
}
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
{
for(const Yolo::Detection& detection : detections)
{
cv::rectangle(image, detection.box, detection.color, 3);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
cv::rectangle(image, textBox, detection.color, cv::FILLED);
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
}
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
}
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
{
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
if(std::max(image.cols, image.rows) > longTargetSize)
{
if(image.cols > image.rows)
{
double ratio = static_cast<double>(longTargetSize)/image.cols;
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
else
{
double ratio = static_cast<double>(longTargetSize)/image.rows;
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
}
}
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
{
InteligentRoi intRoi(yolo);
cv::Mat image = cv::imread(path);
if(!image.data)
{
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
return;
}
reduceSize(image, config.targetSize);
std::vector<Yolo::Detection> detections = yolo.runInference(image);
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
for(Yolo::Detection& detection : detections)
{
bool hasmatch = false;
if(recognizer && detection.className == "person")
{
cv::Mat person = image(detection.box);
reconizerMutex.lock();
FaceRecognizer::Detection match = recognizer->isMatch(person);
reconizerMutex.unlock();
if(match.person >= 0)
{
detection.priority += 10;
hasmatch = true;
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
}
}
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
}
cv::Rect crop;
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
if(config.seamCarving && incompleate)
{
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
{
detections = yolo.runInference(image);
}
}
cv::Mat croppedImage;
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
{
intRoi.getCropRectangle(crop, detections, image.size());
if(config.debug)
{
cv::Mat debugImage = image.clone();
drawDebugInfo(debugImage, crop, detections);
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
if(!ret)
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
}
croppedImage = image(crop);
}
else if(!incompleate)
{
croppedImage = image(crop);
}
else
{
croppedImage = image;
}
cv::Mat resizedImage;
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
if(!ret)
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
}
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
{
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
for(std::filesystem::path path : images)
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
}
template<typename T>
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
{
std::vector<std::vector<T>> out;
size_t length = vec.size()/parts;
size_t remain = vec.size() % parts;
size_t begin = 0;
size_t end = 0;
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
{
end += (remain > 0) ? (length + !!(remain--)) : length;
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
begin = end;
}
return out;
}
int main(int argc, char* argv[])
{
Log::level = Log::INFO;
Config config;
argp_parse(&argp, argc, argv, 0, 0, &config);
if(config.outputDir.empty())
{
Log(Log::ERROR)<<"a output path \"-o\" is required";
return 1;
}
if(config.imagePaths.empty())
{
Log(Log::ERROR)<<"at least one input image or directory is required";
return 1;
}
std::vector<std::filesystem::path> imagePaths;
for(const std::filesystem::path& path : config.imagePaths)
getImageFiles(path, imagePaths);
Log(Log::DEBUG)<<"Images:";
for(const::std::filesystem::path& path: imagePaths)
Log(Log::DEBUG)<<path;
if(imagePaths.empty())
{
Log(Log::ERROR)<<"no image was found\n";
return 1;
}
if(!std::filesystem::exists(config.outputDir))
{
if(!std::filesystem::create_directory(config.outputDir))
{
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
return 1;
}
}
std::filesystem::path debugOutputPath(config.outputDir/"debug");
if(config.debug)
{
if(!std::filesystem::exists(debugOutputPath))
std::filesystem::create_directory(debugOutputPath);
}
FaceRecognizer* recognizer = nullptr;
std::mutex recognizerMutex;
if(!config.focusPersonImage.empty())
{
cv::Mat personImage = cv::imread(config.focusPersonImage);
if(personImage.empty())
{
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
return 1;
}
recognizer = new FaceRecognizer();
recognizer->addReferances({personImage});
recognizer->setThreshold(config.threshold);
}
std::vector<std::thread> threads;
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
for(size_t i = 0; i < imagePathParts.size(); ++i)
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
for(std::thread& thread : threads)
thread.join();
return 0;
}

117
SmartCrop/options.h Normal file
View File

@ -0,0 +1,117 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <argp.h>
#include <iostream>
#include <filesystem>
#include <opencv2/core/types.hpp>
#include "log.h"
const char *argp_program_version = "AIImagePreprocesses";
const char *argp_program_bug_address = "<carl@uvos.xyz>";
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
static char args_doc[] = "FILE(S)";
static struct argp_option options[] =
{
{"verbose", 'v', 0, 0, "Show debug messages" },
{"quiet", 'q', 0, 0, "only output data" },
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
{0}
};
struct Config
{
std::vector<std::filesystem::path> imagePaths;
std::filesystem::path modelPath;
std::filesystem::path classesPath;
std::filesystem::path outputDir;
std::filesystem::path focusPersonImage;
bool seamCarving = false;
bool debug = false;
double threshold = 0.363;
cv::Size targetSize = cv::Size(512, 512);
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
try
{
switch (key)
{
case 'q':
Log::level = Log::ERROR;
break;
case 'v':
Log::level = Log::DEBUG;
break;
case 'm':
config->modelPath = arg;
break;
case 'c':
config->classesPath = arg;
break;
case 'd':
config->debug = true;
break;
case 'o':
config->outputDir.assign(arg);
break;
case 's':
config->seamCarving = true;
break;
case 'f':
config->focusPersonImage = arg;
break;
case 't':
config->threshold = std::atof(arg);
break;
case 'z':
{
int x = std::stoi(arg);
config->targetSize = cv::Size(x, x);
break;
}
case ARGP_KEY_ARG:
config->imagePaths.push_back(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
}
catch(const std::invalid_argument& ex)
{
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
return ARGP_KEY_ERROR;
}
return 0;
}
static struct argp argp = {options, parse_opt, args_doc, doc};

35
SmartCrop/readfile.h Normal file
View File

@ -0,0 +1,35 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <sstream>
inline std::string readFile(const std::filesystem::path& path)
{
std::ifstream file(path);
if(!file.is_open())
throw std::runtime_error(std::string("could not open file ") + path.string());
std::stringstream ss;
ss<<file.rdbuf();
return ss.str();
}

376
SmartCrop/seamcarving.cpp Normal file
View File

@ -0,0 +1,376 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <filesystem>
#include <cfloat>
#include <vector>
#include "log.h"
bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
{
cv::Mat newFrame = image.clone();
assert(!newFrame.empty());
std::vector<std::vector<int>> vecSeams;
for(int i = 0; i < seams; i++)
{
//Gradient Magnitude for intensity of image.
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
//Use DP to create the real energy map that is used for path calculation.
// Strictly using vertical paths for testing simplicity.
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
return false;
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
vecSeams.push_back(seam);
if(seamsVect)
seamsVect->push_back(seam);
newFrame = removeLeastImportantPath(newFrame, seam);
if(newFrame.rows == 0 || newFrame.cols == 0)
return false;
}
if (grow)
{
cv::Mat growMat = image.clone();
for(size_t i = 0; i < vecSeams.size(); i++)
{
growMat = addLeastImportantPath(growMat,vecSeams[i]);
}
image = growMat;
}
else
{
image = newFrame;
}
return true;
}
bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
{
cv::transpose(image, image);
bool ret = strechImage(image, seams, grow, seamsVect);
cv::transpose(image, image);
return ret;
}
bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
{
std::vector<std::vector<int>> seamsVect;
seamsImage = image.clone();
bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
if(!ret)
return false;
for(size_t i = 0; i < seamsVect.size(); ++i)
seamsImage = drawSeam(seamsImage, seamsVect[i]);
return true;
}
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
{
// find partial derivative of x-axis and y-axis seperately
// sum up the partial derivates
float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1};
cv::Mat xFilter(3, 3, CV_32FC1, pd);
cv::Mat yFilter = xFilter.t();
cv::Mat grayImg;
cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY);
cv::Mat dxImg;
cv::Mat dyImg;
cv::filter2D(grayImg, dxImg, 0, xFilter);
cv::filter2D(grayImg, dyImg, 0, yFilter);
//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
//cv::Mat absDxImg;
//cv::Mat absDyImg;
//cv::absdiff(dxImg, zeroMat, absDxImg);
//cv::absdiff(dyImg, zeroMat, absDyImg);
cv::Mat absDxImg = cv::abs(dxImg);
cv::Mat absDyImg = cv::abs(dyImg);
cv::Mat energyImg;
cv::add(absDxImg, absDyImg, energyImg);
return energyImg;
}
cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame)
{
cv::Mat grayScale;
cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY);
cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1);
cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1);
cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1);
Sobel(grayScale, drv, CV_16SC1, 1, 0);
drv.convertTo(drv32f, CV_32FC1);
cv::accumulateSquare(drv32f, mag);
Sobel(grayScale, drv, CV_16SC1, 0, 1);
drv.convertTo(drv32f, CV_32FC1);
cv::accumulateSquare(drv32f, mag);
cv::sqrt(mag, mag);
return mag;
}
float SeamCarving::intensity(float currIndex, int start, int end)
{
if(start < 0 || start >= end)
{
return FLT_MAX;
}
else
{
return currIndex;
}
}
cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap)
{
cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1);
if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0)
{
return cv::Mat();
}
//First row of intensity paths is the same as the energy map
rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0));
float max = 0;
//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
for(int row = 1; row < pathIntensityMap.rows; row++)
{
for(int col = 0; col < pathIntensityMap.cols; col++)
{
//The initial intensity of the pixel is its raw intensity
float pixelIntensity = rawEnergyMap.at<float>(row, col);
//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols);
float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols);
float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols);
float minIntensity = std::min(p1, p2);
minIntensity = std::min(minIntensity, p3);
pixelIntensity += minIntensity;
max = std::max(max, pixelIntensity);
pathIntensityMap.at<float>(row, col) = pixelIntensity;
}
}
return pathIntensityMap;
}
std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap)
{
if(importanceMap.total() == 0)
{
return std::vector<int>();
}
//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0);
int minCol = 0;
for (int col = 1; col < importanceMap.cols; col++)
{
float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col);
if(currPixel < minImportance)
{
minCol = col;
minImportance = currPixel;
}
}
std::vector<int> leastEnergySeam(importanceMap.rows);
leastEnergySeam[importanceMap.rows-1] = minCol;
for(int row = importanceMap.rows - 2; row >= 0; row--)
{
float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols);
float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols);
float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols);
//Adjust the min column for path following
if(p1 < p2 && p1 < p3)
{
minCol -= 1;
}
else if(p3 < p1 && p3 < p2)
{
minCol += 1;
}
leastEnergySeam[row] = minCol;
}
return leastEnergySeam;
}
cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
{
cv::Size orgSize = original.size();
// new mat needs to shrink by one collumn
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type());
for(size_t row = 0; row < seam.size(); row++)
{
removePixel(original, newMat, row, seam[row]);
}
return newMat;
}
void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
{
int width = original.cols;
int channels = original.channels();
int originRowStart = row * channels * width;
int newRowStart = row * channels * (width - 1);
int firstNum = minCol * channels;
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = outputMat.data;
//std::cout << "originRowStart: " << originRowStart << std::endl;
//std::cout << "newRowStart: " << newRowStart << std::endl;
//std::cout << "firstNum: " << firstNum << std::endl;
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
int originRowMid = originRowStart + (minCol + 1) * channels;
int newRowMid = newRowStart + minCol * channels;
int secondNum = (width - 1) * channels - firstNum;
//std::cout << "originRowMid: " << originRowMid << std::endl;
//std::cout << "newRowMid: " << newRowMid << std::endl;
//std::cout << "secondNum: " << secondNum << std::endl;
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
int leftPixel = minCol - 1;
int rightPixel = minCol + 1;
int byte1 = rawOrig[originRowStart + minCol * channels];
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
if (rightPixel < width)
{
int byte1R = rawOrig[originRowStart + rightPixel * channels];
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
}
if(leftPixel >= 0)
{
int byte1L = rawOrig[originRowStart + leftPixel*channels];
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
}
}
cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
{
cv::Size orgSize = original.size();
// new mat needs to grow by one column
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type());
for(size_t row = 0; row < seam.size(); row++)
{
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
addPixel(original, newMat, row, seam[row]);
}
return newMat;
}
void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
{
int width = original.cols;
int channels = original.channels();
int originRowStart = row * channels * width;
int newRowStart = row * channels * (width + 1);
int firstNum = (minCol + 1) * channels;
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = outputMat.data;
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels);
int originRowMid = originRowStart + ((minCol + 1) * channels);
int newRowMid = newRowStart + ((minCol + 2) * channels);
int secondNum = (width * channels) - firstNum;
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
int leftPixel = minCol - 1;
int rightPixel = minCol + 1;
int byte1 = rawOrig[originRowStart + minCol * channels];
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
if (rightPixel < width)
{
int byte1R = rawOrig[originRowStart + rightPixel * channels];
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
}
if(leftPixel >= 0)
{
int byte1L = rawOrig[originRowStart + leftPixel*channels];
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
}
}
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int row = 0; row < frame.rows; row++)
{
for(int col = 0; col < frame.cols; col++)
{
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
}
}
return retMat;
}

43
SmartCrop/seamcarving.h Normal file
View File

@ -0,0 +1,43 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <opencv2/core/core.hpp>
#include <vector>
class SeamCarving
{
private:
static cv::Mat GetEnergyImg(const cv::Mat &img);
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
static float intensity(float currIndex, int start, int end);
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
public:
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
};

46
SmartCrop/tokenize.cpp Normal file
View File

@ -0,0 +1,46 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "tokenize.h"
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
{
std::vector<std::string> tokens;
std::string token;
bool inBaracket = false;
for(size_t i = 0; i < str.size(); ++i)
{
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
{
tokens.push_back(token);
token.clear();
}
else
{
token.push_back(str[i]);
}
if(ignoreBraket == str[i])
inBaracket = !inBaracket;
}
if(!inBaracket)
tokens.push_back(token);
return tokens;
}

26
SmartCrop/tokenize.h Normal file
View File

@ -0,0 +1,26 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
const char escapeChar = '\0');

80
SmartCrop/utils.cpp Normal file
View File

@ -0,0 +1,80 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "utils.h"
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path)
{
return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg");
}
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths)
{
if(isImagePath(path))
{
paths.push_back(path);
}
else if(std::filesystem::is_directory(path))
{
for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path))
{
if(std::filesystem::is_directory(dirent.path()))
getImageFiles(dirent.path(), paths);
else if(isImagePath(dirent.path()))
paths.push_back(dirent.path());
}
}
}
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points)
{
int left = std::numeric_limits<int>::max();
int right = std::numeric_limits<int>::min();
int top = std::numeric_limits<int>::max();
int bottom = std::numeric_limits<int>::min();
for(const std::pair<cv::Point, int>& point : points)
{
left = point.first.x < left ? point.first.x : left;
right = point.first.x > right ? point.first.x : right;
top = point.first.y < top ? point.first.y : top;
bottom = point.first.y > bottom ? point.first.y : bottom;
}
return cv::Rect(left, top, right-left, bottom-top);
}
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB)
{
cv::Vec2i a(pointA.x, pointA.y);
cv::Vec2i b(pointB.x, pointB.y);
return cv::norm(a-b);
}
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect)
{
return point.x >= rect.x && point.x <= rect.x+rect.width &&
point.y >= rect.y && point.y <= rect.y+rect.height;
}

34
SmartCrop/utils.h Normal file
View File

@ -0,0 +1,34 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path);
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);

278
SmartCrop/yolo.cpp Normal file
View File

@ -0,0 +1,278 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include <opencv2/dnn/dnn.hpp>
#include <algorithm>
#include <string>
#include <stdexcept>
#include "yolo.h"
#include "readfile.h"
#include "tokenize.h"
#include "log.h"
#define INCBIN_PREFIX r
#include "incbin.h"
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
if(classesTxtFilePath.empty())
{
Log(Log::INFO)<<"Using builtin classes";
loadClasses(rdefaultClassesData);
}
else
{
std::string classesStr = readFile(classesTxtFilePath);
loadClasses(classesStr);
}
if(!modelPath.empty())
{
net = cv::dnn::readNetFromONNX(modelPath);
}
else
{
Log(Log::INFO)<<"Using builtin yolo model";
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
}
if(runWithOCl)
{
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
}
else
{
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
cv::Mat blob;
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
bool yolov8 = false;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data+4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data+5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Yolo::Detection> detections{};
for(unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Yolo::Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id].first;
result.priority = classes[result.class_id].second;
clampBox(boxes[idx], input.size());
result.box = boxes[idx];
detections.push_back(result);
}
return detections;
}
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
{
if(box.x < 0)
{
box.width += box.x;
box.x = 0;
}
if(box.y < 0)
{
box.height += box.y;
box.y = 0;
}
if(box.x+box.width > size.width)
box.width = size.width - box.x;
if(box.y+box.height > size.height)
box.height = size.height - box.y;
}
void Yolo::loadClasses(const std::string& classesStr)
{
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
classes.clear();
for(std::string& instance : candidateClasses)
{
if(instance.size() < 2)
continue;
std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\');
if(*tokens[0].begin() == '"')
instance.erase(tokens[0].begin());
if(tokens[0].back() == '"')
tokens[0].pop_back();
int priority = -1;
if(tokens.size() > 1)
{
try
{
priority = std::stoi(tokens[1]);
}
catch(const std::invalid_argument& err)
{
Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what();
}
}
classes.push_back({tokens[0], priority});
}
}
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
int Yolo::getClassForStr(const std::string& str) const
{
for(size_t i = 0; i < classes.size(); ++i)
{
if(classes[i].first == str)
return i;
}
return -1;
}

65
SmartCrop/yolo.h Normal file
View File

@ -0,0 +1,65 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <fstream>
#include <vector>
#include <string>
#include <random>
#include <filesystem>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
class Yolo
{
public:
struct Detection
{
int class_id = 0;
std::string className;
float confidence = 0.0;
int priority = -1;
cv::Scalar color;
cv::Rect box;
};
private:
static constexpr float modelConfidenceThreshold = 0.25;
static constexpr float modelScoreThreshold = 0.45;
static constexpr float modelNMSThreshold = 0.50;
std::string modelPath;
std::vector<std::pair<std::string, int>> classes;
cv::Size2f modelShape;
bool letterBoxForSquare = true;
cv::dnn::Net net;
void loadClasses(const std::string& classes);
void loadOnnxNetwork(const std::filesystem::path& path);
cv::Mat formatToSquare(const cv::Mat &source);
static void clampBox(cv::Rect& box, const cv::Size& size);
public:
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true);
std::vector<Detection> runInference(const cv::Mat &input);
int getClassForStr(const std::string& str) const;
};