inital commit

This commit is contained in:
2023-06-28 23:59:50 +02:00
commit 438c9d726c
19 changed files with 2169 additions and 0 deletions

15
CMakeLists.txt Normal file
View File

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 3.6)
project(AIImagePrepross)
find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
add_executable(${PROJECT_NAME} ${SRC_FILES})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)

476
incbin.h Normal file
View File

@ -0,0 +1,476 @@
/**
* @file incbin.h
* @author Dale Weiler
* @brief Utility for including binary files
*
* Facilities for including binary files into the current translation unit and
* making use from them externally in other translation units.
*/
#ifndef INCBIN_HDR
#define INCBIN_HDR
#include <limits.h>
#if defined(__AVX512BW__) || \
defined(__AVX512CD__) || \
defined(__AVX512DQ__) || \
defined(__AVX512ER__) || \
defined(__AVX512PF__) || \
defined(__AVX512VL__) || \
defined(__AVX512F__)
# define INCBIN_ALIGNMENT_INDEX 6
#elif defined(__AVX__) || \
defined(__AVX2__)
# define INCBIN_ALIGNMENT_INDEX 5
#elif defined(__SSE__) || \
defined(__SSE2__) || \
defined(__SSE3__) || \
defined(__SSSE3__) || \
defined(__SSE4_1__) || \
defined(__SSE4_2__) || \
defined(__neon__) || \
defined(__ARM_NEON) || \
defined(__ALTIVEC__)
# define INCBIN_ALIGNMENT_INDEX 4
#elif ULONG_MAX != 0xffffffffu
# define INCBIN_ALIGNMENT_INDEX 3
# else
# define INCBIN_ALIGNMENT_INDEX 2
#endif
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
#define INCBIN_ALIGN_SHIFT_0 1
#define INCBIN_ALIGN_SHIFT_1 2
#define INCBIN_ALIGN_SHIFT_2 4
#define INCBIN_ALIGN_SHIFT_3 8
#define INCBIN_ALIGN_SHIFT_4 16
#define INCBIN_ALIGN_SHIFT_5 32
#define INCBIN_ALIGN_SHIFT_6 64
/* Actual alignment value */
#define INCBIN_ALIGNMENT \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
INCBIN_ALIGNMENT_INDEX)
/* Stringize */
#define INCBIN_STR(X) \
#X
#define INCBIN_STRINGIZE(X) \
INCBIN_STR(X)
/* Concatenate */
#define INCBIN_CAT(X, Y) \
X ## Y
#define INCBIN_CONCATENATE(X, Y) \
INCBIN_CAT(X, Y)
/* Deferred macro expansion */
#define INCBIN_EVAL(X) \
X
#define INCBIN_INVOKE(N, ...) \
INCBIN_EVAL(N(__VA_ARGS__))
/* Variable argument count for overloading by arity */
#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
/* Green Hills uses a different directive for including binary data */
#if defined(__ghs__)
# if (__ghs_asm == 2)
# define INCBIN_MACRO ".file"
/* Or consider the ".myrawdata" entry in the ld file */
# else
# define INCBIN_MACRO "\tINCBIN"
# endif
#else
# define INCBIN_MACRO ".incbin"
#endif
#ifndef _MSC_VER
# define INCBIN_ALIGN \
__attribute__((aligned(INCBIN_ALIGNMENT)))
#else
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
#endif
#if defined(__arm__) || /* GNU C and RealView */ \
defined(__arm) || /* Diab */ \
defined(_ARM) /* ImageCraft */
# define INCBIN_ARM
#endif
#ifdef __GNUC__
/* Utilize .balign where supported */
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".balign 1\n"
#elif defined(INCBIN_ARM)
/*
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
* the shift count. This is the value passed to `.align'
*/
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
# define INCBIN_ALIGN_BYTE ".align 0\n"
#else
/* We assume other inline assembler's treat `.align' as `.balign' */
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".align 1\n"
#endif
/* INCBIN_CONST is used by incbin.c generated files */
#if defined(__cplusplus)
# define INCBIN_EXTERNAL extern "C"
# define INCBIN_CONST extern const
#else
# define INCBIN_EXTERNAL extern
# define INCBIN_CONST const
#endif
/**
* @brief Optionally override the linker section into which size and data is
* emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*/
#if !defined(INCBIN_OUTPUT_SECTION)
# if defined(__APPLE__)
# define INCBIN_OUTPUT_SECTION ".const_data"
# else
# define INCBIN_OUTPUT_SECTION ".rodata"
# endif
#endif
/**
* @brief Optionally override the linker section into which data is emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*/
#if !defined(INCBIN_OUTPUT_DATA_SECTION)
# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
#endif
/**
* @brief Optionally override the linker section into which size is emitted.
*
* @warning If you use this facility, you might have to deal with
* platform-specific linker output section naming on your own.
*
* @note This is useful for Harvard architectures where program memory cannot
* be directly read from the program without special instructions. With this you
* can chose to put the size variable in RAM rather than ROM.
*/
#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
#endif
#if defined(__APPLE__)
# include "TargetConditionals.h"
# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
# endif
/* The directives are different for Apple branded compilers */
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# define INCBIN_INT ".long "
# define INCBIN_MANGLE "_"
# define INCBIN_BYTE ".byte "
# define INCBIN_TYPE(...)
#else
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# if defined(__ghs__)
# define INCBIN_INT ".word "
# else
# define INCBIN_INT ".int "
# endif
# if defined(__USER_LABEL_PREFIX__)
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
# else
# define INCBIN_MANGLE ""
# endif
# if defined(INCBIN_ARM)
/* On arm assemblers, `@' is used as a line comment token */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
# elif defined(__MINGW32__) || defined(__MINGW64__)
/* Mingw doesn't support this directive either */
# define INCBIN_TYPE(NAME)
# else
/* It's safe to use `@' on other architectures */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
# endif
# define INCBIN_BYTE ".byte "
#endif
/* List of style types used for symbol names */
#define INCBIN_STYLE_CAMEL 0
#define INCBIN_STYLE_SNAKE 1
/**
* @brief Specify the prefix to use for symbol names.
*
* @note By default this is "g".
*
* @code
* #define INCBIN_PREFIX incbin
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols instead:
* // const unsigned char incbinFoo<data>[];
* // const unsigned char *const incbinFoo<end>;
* // const unsigned int incbinFoo<size>;
* @endcode
*/
#if !defined(INCBIN_PREFIX)
# define INCBIN_PREFIX g
#endif
/**
* @brief Specify the style used for symbol names.
*
* Possible options are
* - INCBIN_STYLE_CAMEL "CamelCase"
* - INCBIN_STYLE_SNAKE "snake_case"
*
* @note By default this is INCBIN_STYLE_CAMEL
*
* @code
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
* #include "incbin.h"
* INCBIN(foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>foo_data[];
* // const unsigned char *const <prefix>foo_end;
* // const unsigned int <prefix>foo_size;
* @endcode
*/
#if !defined(INCBIN_STYLE)
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
#endif
/* Style lookup tables */
#define INCBIN_STYLE_0_DATA Data
#define INCBIN_STYLE_0_END End
#define INCBIN_STYLE_0_SIZE Size
#define INCBIN_STYLE_1_DATA _data
#define INCBIN_STYLE_1_END _end
#define INCBIN_STYLE_1_SIZE _size
/* Style lookup: returning identifier */
#define INCBIN_STYLE_IDENT(TYPE) \
INCBIN_CONCATENATE( \
INCBIN_STYLE_, \
INCBIN_CONCATENATE( \
INCBIN_EVAL(INCBIN_STYLE), \
INCBIN_CONCATENATE(_, TYPE)))
/* Style lookup: returning string literal */
#define INCBIN_STYLE_STRING(TYPE) \
INCBIN_STRINGIZE( \
INCBIN_STYLE_IDENT(TYPE)) \
/* Generate the global labels by indirectly invoking the macro with our style
* type and concatenating the name against them. */
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
INCBIN_INVOKE( \
INCBIN_GLOBAL, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE))) \
INCBIN_INVOKE( \
INCBIN_TYPE, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE)))
/**
* @brief Externally reference binary data included in another translation unit.
*
* Produces three external symbols that reference the binary data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
* @param NAME The name given for the binary data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const unsigned char <prefix>Foo<data>[];
* // extern const unsigned char *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*
* You may specify a custom optional data type as well as the first argument.
* @code
* INCBIN_EXTERN(custom_type, Foo);
*
* // Now you have the following symbols:
* // extern const custom_type <prefix>Foo<data>[];
* // extern const custom_type *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*/
#define INCBIN_EXTERN(...) \
INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
#define INCBIN_EXTERN_1(NAME, ...) \
INCBIN_EXTERN_2(unsigned char, NAME)
#define INCBIN_EXTERN_2(TYPE, NAME) \
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(DATA))[]; \
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(END)); \
INCBIN_EXTERNAL const unsigned int \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(SIZE))
/**
* @brief Externally reference textual data included in another translation unit.
*
* Produces three external symbols that reference the textual data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name given for the textual data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const char <prefix>Foo<data>[];
* // extern const char *const <prefix>Foo<end>;
* // extern const unsigned int <prefix>Foo<size>;
* @endcode
*/
#define INCTXT_EXTERN(NAME) \
INCBIN_EXTERN_2(char, NAME)
/**
* @brief Include a binary file into the current translation unit.
*
* Includes a binary file into the current translation unit, producing three symbols
* for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCBIN(Icon, "icon.png");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>Icon<data>[];
* // const unsigned char *const <prefix>Icon<end>;
* // const unsigned int <prefix>Icon<size>;
* @endcode
*
* You may specify a custom optional data type as well as the first argument.
* These macros are specialized by arity.
* @code
* INCBIN(custom_type, Icon, "icon.png");
*
* // Now you have the following symbols:
* // const custom_type <prefix>Icon<data>[];
* // const custom_type *const <prefix>Icon<end>;
* // const unsigned int <prefix>Icon<size>;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#ifdef _MSC_VER
# define INCBIN(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
# define INCBIN(...) \
INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
# if defined(__GNUC__)
# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
# elif defined(__clang__)
# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
# else
# define INCBIN_1(...) /* Cannot do anything here */
# endif
# define INCBIN_2(NAME, FILENAME) \
INCBIN_3(unsigned char, NAME, FILENAME)
# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
__asm__(INCBIN_SECTION \
INCBIN_GLOBAL_LABELS(NAME, DATA) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
INCBIN_MACRO " \"" FILENAME "\"\n" \
TERMINATOR \
INCBIN_GLOBAL_LABELS(NAME, END) \
INCBIN_ALIGN_BYTE \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
INCBIN_BYTE "1\n" \
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
INCBIN_ALIGN_HOST \
".text\n" \
); \
INCBIN_EXTERN(TYPE, NAME)
#endif
/**
* @brief Include a textual file into the current translation unit.
*
* This behaves the same as INCBIN except it produces char compatible arrays
* and implicitly adds a null-terminator byte, thus the size of data included
* by this is one byte larger than that of INCBIN.
*
* Includes a textual file into the current translation unit, producing three
* symbols for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCTXT(Readme, "readme.txt");
*
* // Now you have the following symbols:
* // const char <prefix>Readme<data>[];
* // const char *const <prefix>Readme<end>;
* // const unsigned int <prefix>Readme<size>;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#if defined(_MSC_VER)
# define INCTXT(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
# define INCTXT(NAME, FILENAME) \
INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
#endif
#endif

99
intelligentroi.cpp Normal file
View File

@ -0,0 +1,99 @@
#include "intelligentroi.h"
#include <opencv2/imgproc.hpp>
#include "utils.h"
#include "log.h"
bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center)
{
if(a.second != b.second)
return a.second > b.second;
double distA = pointDist(a.first, center);
double distB = pointDist(b.first, center);
return distA < distB;
}
void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
{
if(!pointInRect(point, rect))
{
if(point.x < rect.x)
rect.x = point.x;
else if(point.x > rect.x+rect.width)
rect.x = point.x-rect.width;
if(point.y < rect.y)
rect.y = point.y;
else if(point.y > rect.y+rect.height)
rect.y = point.y-rect.height;
}
}
cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
{
int radius = std::min(imageSize.height, imageSize.width)/2;
cv::Point2i point(imageSize.width/2, imageSize.height/2);
cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2);
std::sort(mustInclude.begin(), mustInclude.end(),
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
while(true)
{
cv::Rect includeRect = rectFromPoints(mustInclude);
if(includeRect.width-2 > radius || includeRect.height-2 > radius)
{
slideRectToPoint(candiate, mustInclude.back().first);
mustInclude.pop_back();
Log(Log::DEBUG)<<"cant fill";
for(const std::pair<cv::Point2i, int>& mipoint : mustInclude)
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
}
else
break;
}
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
slideRectToPoint(candiate, includePoint.first);
if(candiate.x < 0)
candiate.x = 0;
if(candiate.y < 0)
candiate.y = 0;
if(candiate.x+candiate.width > imageSize.width)
candiate.width = imageSize.width-candiate.x;
if(candiate.y+candiate.height > imageSize.height)
candiate.height = imageSize.height-candiate.y;
return candiate;
}
InteligentRoi::InteligentRoi(const Yolo& yolo)
{
personId = yolo.getClassForStr("person");
}
cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
{
if(!detections.empty())
{
std::vector<std::pair<cv::Point2i, int>> corners;
for(size_t i = 0; i < detections.size(); ++i)
{
int priority = detections[i].priority;
if(detections[i].class_id == personId)
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
return maxRect(imageSize, corners);
}
Log(Log::DEBUG)<<"Using center crop as there are no detections";
return maxRect(imageSize);
}

18
intelligentroi.h Normal file
View File

@ -0,0 +1,18 @@
#pragma once
#include <opencv2/imgproc.hpp>
#include "yolo.h"
class InteligentRoi
{
private:
int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public:
InteligentRoi(const Yolo& yolo);
cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
};

63
log.cpp Normal file
View File

@ -0,0 +1,63 @@
/**
* Lubricant Detecter
* Copyright (C) 2021 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "log.h"
Log::Log(Level type, bool endlineI): endline(endlineI)
{
msglevel = type;
if(headers)
{
operator << ("["+getLabel(type)+"] ");
}
}
Log::~Log()
{
if(opened && endline)
{
std::cout<<'\n';
}
opened = false;
}
std::string Log::getLabel(Level level)
{
std::string label;
switch(level)
{
case DEBUG:
label = "DEBUG";
break;
case INFO:
label = "INFO ";
break;
case WARN:
label = "WARN ";
break;
case ERROR:
label = "ERROR";
break;
}
return label;
}
bool Log::headers = false;
Log::Level Log::level = WARN;

64
log.h Normal file
View File

@ -0,0 +1,64 @@
/**
* eisgenerator
* Copyright (C) 2021 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <iostream>
#include <string>
class Log
{
public:
enum Level
{
DEBUG,
INFO,
WARN,
ERROR
};
private:
bool opened = false;
Level msglevel = DEBUG;
bool endline = true;
std::string getLabel(Level level);
public:
static bool headers;
static Level level;
Log() {}
Log(Level type, bool endlineI = true);
~Log();
template<class T> Log &operator<<(const T &msg)
{
if(msglevel >= level)
{
if(msglevel == ERROR)
std::cerr<<msg;
else
std::cout<<msg;
opened = true;
}
return *this;
}
};

295
main.cpp Normal file
View File

@ -0,0 +1,295 @@
#include <filesystem>
#include <iostream>
#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
#include <algorithm>
#include <vector>
#include "yolo.h"
#include "log.h"
#include "options.h"
#include "utils.h"
#include "intelligentroi.h"
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(!ignore || ignore != &detection)
continue;
if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
{
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
{
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.x > x)
{
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
closest = &detection;
}
}
if(closest)
x = closest->box.x;
else
x = imgSizeX;
return false;
}
else
{
x = inDetection->box.br().x;
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().x > x)
return findRegionEndpointHoriz(x, detections, imgSizeX);
else
return true;
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
for(int x = 0; x < image.cols; ++x)
{
int start = x;
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
out.push_back({slice, frozen});
}
return out;
}
const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(!ignore || ignore != &detection)
continue;
if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
{
if(!inDetection || detection.box.br().y > inDetection->box.br().y)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
{
const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.y > y)
{
if(closest == nullptr || detection.box.y-y > closest->box.y-y)
closest = &detection;
}
}
if(closest)
y = closest->box.y;
else
y = imgSizeY;
return false;
}
else
{
y = inDetection->box.br().y;
const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().y > y)
return findRegionEndpointVert(y, detections, imgSizeY);
else
return true;
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
for(int y = 0; y < image.rows; ++y)
{
int start = y;
bool frozen = findRegionEndpointVert(y, detections, image.rows);
cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
out.push_back({slice, frozen});
}
return out;
}
bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
{
double aspectRatio = image.cols/static_cast<double>(image.rows);
bool vertical = false;
cv::Mat workImage;
if(aspectRatio > targetAspectRatio)
vertical = true;
int requiredLines = 0;
if(!vertical)
requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
else
requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
if(!vertical)
{
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
int totalResizableSize = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
if(slice.second)
totalResizableSize += slice.first.cols;
}
std::vector<int> seamsForSlice(slices.size());
for(size_t i = 0; i < slices.size(); ++i)
{
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
}
}
else
{
int totalResizableSize = 0;
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
}
}
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
{
for(const Yolo::Detection& detection : detections)
{
cv::rectangle(image, detection.box, detection.color, 4);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
cv::rectangle(image, textBox, detection.color, cv::FILLED);
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
}
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
}
int main(int argc, char* argv[])
{
Log::level = Log::INFO;
Config config;
argp_parse(&argp, argc, argv, 0, 0, &config);
if(config.outputDir.empty())
{
Log(Log::ERROR)<<"a output path \"-o\" is required";
return 1;
}
if(config.imagePaths.empty())
{
Log(Log::ERROR)<<"at least one input image or directory is required";
return 1;
}
std::vector<std::filesystem::path> imagePaths;
for(const std::filesystem::path& path : config.imagePaths)
getImageFiles(path, imagePaths);
if(imagePaths.empty())
{
Log(Log::ERROR)<<"no image was found\n";
return 1;
}
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
InteligentRoi intRoi(yolo);
if(!std::filesystem::exists(config.outputDir))
{
if(!std::filesystem::create_directory(config.outputDir))
{
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
return 1;
}
}
std::filesystem::path debugOutputPath(config.outputDir/"debug");
if(config.debug)
{
if(!std::filesystem::exists(debugOutputPath))
std::filesystem::create_directory(debugOutputPath);
}
for(const std::filesystem::path& path : imagePaths)
{
cv::Mat image = cv::imread(path);
if(!image.data)
{
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
continue;
}
if(std::max(image.cols, image.rows) > 1024)
{
if(image.cols > image.rows)
{
double ratio = 1024.0/image.cols;
cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
}
else
{
double ratio = 1024.0/image.rows;
cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
}
}
std::vector<Yolo::Detection> detections = yolo.runInference(image);
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
for(const Yolo::Detection& detection : detections)
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
cv::Mat debugImage = image.clone();
drawDebugInfo(debugImage, crop, detections);
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
if(!ret)
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
cv::Mat croppedImage = image(crop);
cv::Mat resizedImage;
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
if(!ret)
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
}
return 0;
}

70
options.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
#include <string>
#include <vector>
#include <argp.h>
#include <iostream>
#include <filesystem>
#include "log.h"
const char *argp_program_version = "AIImagePreprocesses";
const char *argp_program_bug_address = "<carl@uvos.xyz>";
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
static char args_doc[] = "[IMAGES]";
static struct argp_option options[] =
{
{"verbose", 'v', 0, 0, "Show debug messages" },
{"quiet", 'q', 0, 0, "only output data" },
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "model to train: "}
};
struct Config
{
std::vector<std::filesystem::path> imagePaths;
std::filesystem::path modelPath;
std::filesystem::path classesPath;
std::filesystem::path outputDir;
bool seamCarving = false;
bool debug = false;
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
switch (key)
{
case 'q':
Log::level = Log::ERROR;
break;
case 'v':
Log::level = Log::DEBUG;
break;
case 'm':
config->modelPath = arg;
break;
case 'c':
config->classesPath = arg;
break;
case 'd':
config->debug = true;
break;
case 'o':
config->outputDir.assign(arg);
break;
case 's':
config->seamCarving = true;
break;
case ARGP_KEY_ARG:
config->imagePaths.push_back(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = {options, parse_opt, args_doc, doc};

16
readfile.h Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#include <string>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <sstream>
inline std::string readFile(const std::filesystem::path& path)
{
std::ifstream file(path);
if(!file.is_open())
throw std::runtime_error(std::string("could not open file ") + path.string());
std::stringstream ss;
ss<<file.rdbuf();
return ss.str();
}

520
seamcarving.cpp Normal file
View File

@ -0,0 +1,520 @@
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#if __cplusplus >= 201703L
#include <filesystem>
#endif
#include <cfloat>
SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) :
image(img), seams(seams), grow(grow) {}
void SeamCarving::init()
{
cv::Mat newFrame = image.clone();
for(int i = 0; i < seams; i++)
{
//Gradient Magnitude for intensity of image.
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
//Use DP to create the real energy map that is used for path calculation.
// Strictly using vertical paths for testing simplicity.
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
{
finalImage = image;
break;
}
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
vecSeams.push_back(seam);
newFrame = removeLeastImportantPath(newFrame,seam);
if(newFrame.rows == 0 && newFrame.cols == 0)
{
finalImage = image;
break;
}
}
if (grow)
{
cv::Mat growMat = image.clone();
for (int i = 0; i < vecSeams.size(); i++)
{
growMat = addLeastImportantPath(growMat,vecSeams[i]);
}
finalImage = growMat;
}
else
{
finalImage = newFrame;
}
sliderPos = seams;
}
void SeamCarving::computeNewFinalImage(int sliderPos)
{
if(sliderPos == 0)
{
finalImage = image;
return;
}
if(sliderPos < 1 || sliderPos >= sliderMax-1)
{
return;
}
if(sliderPos > vecSeams.size())
{
cv::Mat newFrame = finalImage.clone();
for(int i = vecSeams.size()-1; i < sliderPos; i++)
{
//Gradient Magnitude for intensity of image.
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
//Use DP to create the real energy map that is used for path calculation.
// Strictly using vertical paths for testing simplicity.
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
{
finalImage = image;
break;
}
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
vecSeams.push_back(seam);
newFrame = removeLeastImportantPath(newFrame,seam);
if(newFrame.rows == 0 && newFrame.cols == 0)
{
finalImage = image;
break;
}
}
if (grow)
{
cv::Mat growMat = image.clone();
for (int i = 0; i < vecSeams.size(); i++)
{
growMat = addLeastImportantPath(growMat,vecSeams[i]);
}
finalImage = growMat;
}
else
{
finalImage = newFrame;
}
}
else if (sliderPos < vecSeams.size())
{
cv::Mat newFrame = image.clone();
for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not)
{
if (grow)
{
newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
}
else
{
newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
}
if(newFrame.rows == 0 && newFrame.cols == 0)
{
finalImage = image;
break;
}
}
finalImage = newFrame;
}
}
const cv::Mat& SeamCarving::getFinalImage()
{
return finalImage;
}
void SeamCarving::showSeamsImg()
{
cv::Mat seamsFrame = image.clone();
//std::cout << "sliderPos: " << sliderPos << std::endl;
for(int i = 0; i < sliderPos; i++)
{
seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
}
cv::imwrite("output/seams_image.jpg", seamsFrame);
cv::imshow( "Image Seams", seamsFrame);
}
static void onChange( int pos, void* object )
{
SeamCarving* sc = (SeamCarving*)(object);
/*if(sc->getBlockUpdateStatus()) {
return;
}*/
sc->computeNewFinalImage(pos);
imshow("Final Image", sc->getFinalImage());
#if DEBUG
sc->showSeamsImg();
#endif
}
static void onMouse( int event, int x, int y, int, void* object)
{
SeamCarving* sc = (SeamCarving*)(object);
if( event == cv::EVENT_LBUTTONDOWN ||
event == cv::EVENT_RBUTTONDOWN ||
event == cv::EVENT_MBUTTONDOWN
)
{
sc->setBlockUpdate(true);
}
else if(event == cv::EVENT_LBUTTONUP ||
event == cv::EVENT_RBUTTONUP ||
event == cv::EVENT_MBUTTONUP)
{
sc->setBlockUpdate(false);
}
}
void SeamCarving::setBlockUpdate(bool bUpdate)
{
blockUpdate = bUpdate;
}
bool SeamCarving::getBlockUpdateStatus()
{
return blockUpdate;
}
void SeamCarving::showImage()
{
#if __cplusplus >= 201703L
if(!std::filesystem::exists("output"))
{
std::filesystem::create_directory("output");
}
#endif
if( image.empty() )
{
std::cout << "Could not open raw image" << std::endl ;
return;
}
namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
cv::imshow( "Raw Image", image );
if( finalImage.empty() )
{
std::cout << "Could not open final image" << std::endl ;
return;
}
#if DEBUG
namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
cv::Mat gradient = computeGradientMagnitude(image);
cv::Mat u8_image;
gradient.convertTo(u8_image, CV_8U);
cv::imwrite("output/gradient_image.jpg", u8_image);
cv::imshow("gradient Image", u8_image);
namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
cv::Mat u8_image2;
cv::Mat intensityMat = computePathIntensityMat(gradient);
cv::Mat dst;
cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
dst.convertTo(u8_image2, CV_8U);
cv::imwrite("output/intensity_image.jpg", u8_image2);
cv::imshow( "intensity Image", u8_image2);
//cv::Mat engImg = GetEnergyImg(image);
//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
//cv::Mat u8_image3;
//engImg.convertTo(u8_image3, CV_8U);
//cv::imshow( "energy Image", u8_image3);
namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
showSeamsImg();
#endif
namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
//cv::setMouseCallback("Final Image", onMouse, this );
cv::imwrite("output/final_image.jpg", finalImage);
cv::imshow("Final Image", finalImage);
cv::waitKey(0);
}
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
{
// find partial derivative of x-axis and y-axis seperately
// sum up the partial derivates
float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1};
cv::Mat xFilter(3, 3, CV_32FC1, pd);
cv::Mat yFilter = xFilter.t();
cv::Mat grayImg;
cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY);
cv::Mat dxImg;
cv::Mat dyImg;
cv::filter2D(grayImg, dxImg, 0, xFilter);
cv::filter2D(grayImg, dyImg, 0, yFilter);
//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
//cv::Mat absDxImg;
//cv::Mat absDyImg;
//cv::absdiff(dxImg, zeroMat, absDxImg);
//cv::absdiff(dyImg, zeroMat, absDyImg);
cv::Mat absDxImg = cv::abs(dxImg);
cv::Mat absDyImg = cv::abs(dyImg);
cv::Mat energyImg;
cv::add(absDxImg, absDyImg, energyImg);
return energyImg;
}
cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame)
{
cv::Mat grayScale;
cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY);
cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1);
cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1);
cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1);
Sobel(grayScale, drv, CV_16SC1, 1, 0);
drv.convertTo(drv32f, CV_32FC1);
cv::accumulateSquare(drv32f, mag);
Sobel(grayScale, drv, CV_16SC1, 0, 1);
drv.convertTo(drv32f, CV_32FC1);
cv::accumulateSquare(drv32f, mag);
cv::sqrt(mag, mag);
return mag;
}
float SeamCarving::intensity(float currIndex, int start, int end)
{
if(start < 0 || start >= end)
{
return FLT_MAX;
}
else
{
return currIndex;
}
}
cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap)
{
cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1);
if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0)
{
return cv::Mat();
}
//First row of intensity paths is the same as the energy map
rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0));
float max = 0;
//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
for(int row = 1; row < pathIntensityMap.rows; row++)
{
for(int col = 0; col < pathIntensityMap.cols; col++)
{
//The initial intensity of the pixel is its raw intensity
float pixelIntensity = rawEnergyMap.at<float>(row, col);
//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols);
float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols);
float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols);
float minIntensity = std::min(p1, p2);
minIntensity = std::min(minIntensity, p3);
pixelIntensity += minIntensity;
max = std::max(max, pixelIntensity);
pathIntensityMap.at<float>(row, col) = pixelIntensity;
}
}
return pathIntensityMap;
}
std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap)
{
if(importanceMap.total() == 0)
{
return std::vector<int>();
}
//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0);
int minCol = 0;
for (int col = 1; col < importanceMap.cols; col++)
{
float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col);
if(currPixel < minImportance)
{
minCol = col;
minImportance = currPixel;
}
}
std::vector<int> leastEnergySeam(importanceMap.rows);
leastEnergySeam[importanceMap.rows-1] = minCol;
for(int row = importanceMap.rows - 2; row >= 0; row--)
{
float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols);
float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols);
float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols);
//Adjust the min column for path following
if(p1 < p2 && p1 < p3)
{
minCol -= 1;
}
else if(p3 < p1 && p3 < p2)
{
minCol += 1;
}
leastEnergySeam[row] = minCol;
}
return leastEnergySeam;
}
cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
{
cv::Size orgSize = original.size();
// new mat needs to shrink by one collumn
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type());
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = newMat.data;
for(int row = 0; row < seam.size(); row++)
{
removePixel(original, newMat, row, seam[row]);
}
return newMat;
}
void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
{
int width = original.cols;
int channels = original.channels();
int originRowStart = row * channels * width;
int newRowStart = row * channels * (width - 1);
int firstNum = minCol * channels;
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = outputMat.data;
//std::cout << "originRowStart: " << originRowStart << std::endl;
//std::cout << "newRowStart: " << newRowStart << std::endl;
//std::cout << "firstNum: " << firstNum << std::endl;
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
int originRowMid = originRowStart + (minCol + 1) * channels;
int newRowMid = newRowStart + minCol * channels;
int secondNum = (width - 1) * channels - firstNum;
//std::cout << "originRowMid: " << originRowMid << std::endl;
//std::cout << "newRowMid: " << newRowMid << std::endl;
//std::cout << "secondNum: " << secondNum << std::endl;
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
int leftPixel = minCol - 1;
int rightPixel = minCol + 1;
int byte1 = rawOrig[originRowStart + minCol * channels];
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
if (rightPixel < width)
{
int byte1R = rawOrig[originRowStart + rightPixel * channels];
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
}
if(leftPixel >= 0)
{
int byte1L = rawOrig[originRowStart + leftPixel*channels];
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
}
}
cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
{
cv::Size orgSize = original.size();
// new mat needs to grow by one column
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type());
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = newMat.data;
for(int row = 0; row < seam.size(); row++)
{
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
addPixel(original, newMat, row, seam[row]);
}
return newMat;
}
void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
{
int width = original.cols;
int channels = original.channels();
int originRowStart = row * channels * width;
int newRowStart = row * channels * (width + 1);
int firstNum = (minCol + 1) * channels;
unsigned char *rawOrig = original.data;
unsigned char *rawOutput = outputMat.data;
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels);
int originRowMid = originRowStart + ((minCol + 1) * channels);
int newRowMid = newRowStart + ((minCol + 2) * channels);
int secondNum = (width * channels) - firstNum;
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
int leftPixel = minCol - 1;
int rightPixel = minCol + 1;
int byte1 = rawOrig[originRowStart + minCol * channels];
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
if (rightPixel < width)
{
int byte1R = rawOrig[originRowStart + rightPixel * channels];
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
}
if(leftPixel >= 0)
{
int byte1L = rawOrig[originRowStart + leftPixel*channels];
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
}
}

61
seamcarving.h Normal file
View File

@ -0,0 +1,61 @@
#ifndef __SEAM__CARVING_HPP__
#define __SEAM__CARVING_HPP__
#include <opencv2/core/core.hpp>
#define DEBUG 0
class SeamCarving {
public:
void showImage();
const cv::Mat& getFinalImage();
virtual void computeNewFinalImage(int pos);
void setBlockUpdate(bool bUpdate);
bool getBlockUpdateStatus();
virtual void showSeamsImg();
protected:
SeamCarving(const cv::Mat &img, int seams, bool grow);
void init();
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
cv::Mat image;
cv::Mat finalImage;
int seams;
bool grow;
int sliderMax;
int sliderPos;
std::vector<std::vector<int>> vecSeams;
private:
cv::Mat GetEnergyImg(const cv::Mat &img);
cv::Mat computeGradientMagnitude(const cv::Mat &frame);
float intensity(float currIndex, int start, int end);
cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
bool blockUpdate = false;
};
class SeamCarvingHorizontal : public SeamCarving
{
public:
SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
protected:
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
};
class SeamCarvingVertical : public SeamCarving {
public:
SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
virtual void computeNewFinalImage(int pos) override;
#if DEBUG
virtual void showSeamsImg() override;
#endif
protected:
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
};
#endif // __SEAM__CARVING_HPP__

28
seamcarvinghoriz.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <cfloat>
cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int row = 0; row < frame.rows; row++)
{
for(int col = 0; col < frame.cols; col++)
{
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
}
}
return retMat;
}
SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
{
sliderMax = image.cols;
init();
}

51
seamcarvingvert.cpp Normal file
View File

@ -0,0 +1,51 @@
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <cfloat>
SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
{
sliderMax = image.rows;
cv::Mat oldImage = image;
image = image.t();
init();
image = oldImage;
finalImage = finalImage.t();
}
cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int col = 0; col < frame.cols; col++)
{
for(int row = 0; row < frame.rows; row++)
{
retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
}
}
return retMat;
}
void SeamCarvingVertical::computeNewFinalImage(int pos)
{
cv::Mat oldImage = image;
image = image.t();
SeamCarving::computeNewFinalImage(pos);
image = oldImage;
finalImage = finalImage.t();
}
#if DEBUG
void SeamCarvingVertical::showSeamsImg()
{
cv::Mat oldImage = this->image;
this->image = this->image.t();
SeamCarving::showImage();
this->image = oldImage;
}
#endif

26
tokenize.cpp Normal file
View File

@ -0,0 +1,26 @@
#include "tokenize.h"
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
{
std::vector<std::string> tokens;
std::string token;
bool inBaracket = false;
for(size_t i = 0; i < str.size(); ++i)
{
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
{
tokens.push_back(token);
token.clear();
}
else
{
token.push_back(str[i]);
}
if(ignoreBraket == str[i])
inBaracket = !inBaracket;
}
if(!inBaracket)
tokens.push_back(token);
return tokens;
}

7
tokenize.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include <string>
#include <vector>
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
const char escapeChar = '\0');

60
utils.cpp Normal file
View File

@ -0,0 +1,60 @@
#include "utils.h"
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path)
{
return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg");
}
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths)
{
if(isImagePath(path))
{
paths.push_back(path);
}
else if(std::filesystem::is_directory(path))
{
for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path))
{
if(std::filesystem::is_directory(dirent.path()))
getImageFiles(dirent.path(), paths);
else if(isImagePath(dirent.path()))
paths.push_back(dirent.path());
}
}
}
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points)
{
int left = std::numeric_limits<int>::max();
int right = std::numeric_limits<int>::min();
int top = std::numeric_limits<int>::max();
int bottom = std::numeric_limits<int>::min();
for(const std::pair<cv::Point, int>& point : points)
{
left = point.first.x < left ? point.first.x : left;
right = point.first.x > right ? point.first.x : right;
top = point.first.y < top ? point.first.y : top;
bottom = point.first.y > bottom ? point.first.y : bottom;
}
return cv::Rect(left, top, right-left, bottom-top);
}
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB)
{
cv::Vec2i a(pointA.x, pointA.y);
cv::Vec2i b(pointB.x, pointB.y);
return cv::norm(a-b);
}
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect)
{
return point.x >= rect.x && point.x <= rect.x+rect.width &&
point.y >= rect.y && point.y <= rect.y+rect.height;
}

15
utils.h Normal file
View File

@ -0,0 +1,15 @@
#pragma once
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path);
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);

236
yolo.cpp Normal file
View File

@ -0,0 +1,236 @@
#include <opencv2/dnn/dnn.hpp>
#include <algorithm>
#include <string>
#include <stdexcept>
#include "yolo.h"
#include "readfile.h"
#include "tokenize.h"
#include "log.h"
#define INCBIN_PREFIX r
#include "incbin.h"
INCTXT(defaultClasses, "../classes.txt");
INCBIN(defaultModel, "../yolov8x.onnx");
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
if(classesTxtFilePath.empty())
{
loadClasses(rdefaultClassesData);
}
else
{
std::string classesStr = readFile(classesTxtFilePath);
loadClasses(classesStr);
}
if(!modelPath.empty())
net = cv::dnn::readNetFromONNX(modelPath);
else
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
if(runWithOCl)
{
std::cout << "\nRunning on OCV" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
}
else
{
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
cv::Mat blob;
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
bool yolov8 = false;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data+4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data+5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Yolo::Detection> detections{};
for(unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Yolo::Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id].first;
result.priority = classes[result.class_id].second;
result.box = boxes[idx];
detections.push_back(result);
}
return detections;
}
void Yolo::loadClasses(const std::string& classesStr)
{
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
classes.clear();
for(std::string& instance : candidateClasses)
{
if(instance.size() < 2)
continue;
std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\');
if(*tokens[0].begin() == '"')
instance.erase(tokens[0].begin());
if(tokens[0].back() == '"')
tokens[0].pop_back();
int priority = -1;
if(tokens.size() > 1)
{
try
{
priority = std::stoi(tokens[1]);
}
catch(const std::invalid_argument& err)
{
Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what();
}
}
classes.push_back({tokens[0], priority});
}
}
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
int Yolo::getClassForStr(const std::string& str) const
{
for(size_t i = 0; i < classes.size(); ++i)
{
if(classes[i].first == str)
return i;
}
return -1;
}

49
yolo.h Normal file
View File

@ -0,0 +1,49 @@
#pragma once
#include <fstream>
#include <vector>
#include <string>
#include <random>
#include <filesystem>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
class Yolo
{
public:
struct Detection
{
int class_id = 0;
std::string className;
float confidence = 0.0;
int priority = -1;
cv::Scalar color;
cv::Rect box;
};
private:
static constexpr float modelConfidenceThreshold = 0.25;
static constexpr float modelScoreThreshold = 0.45;
static constexpr float modelNMSThreshold = 0.50;
void loadClasses(const std::string& classes);
void loadOnnxNetwork(const std::filesystem::path& path);
cv::Mat formatToSquare(const cv::Mat &source);
std::string modelPath;
std::vector<std::pair<std::string, int>> classes;
cv::Size2f modelShape;
bool letterBoxForSquare = true;
cv::dnn::Net net;
public:
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true);
std::vector<Detection> runInference(const cv::Mat &input);
int getClassForStr(const std::string& str) const;
};