From 438c9d726caa62821c2c4932a310d53150458d44 Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 28 Jun 2023 23:59:50 +0200 Subject: [PATCH] inital commit --- CMakeLists.txt | 15 ++ incbin.h | 476 +++++++++++++++++++++++++++++++++++++++ intelligentroi.cpp | 99 ++++++++ intelligentroi.h | 18 ++ log.cpp | 63 ++++++ log.h | 64 ++++++ main.cpp | 295 ++++++++++++++++++++++++ options.h | 70 ++++++ readfile.h | 16 ++ seamcarving.cpp | 520 +++++++++++++++++++++++++++++++++++++++++++ seamcarving.h | 61 +++++ seamcarvinghoriz.cpp | 28 +++ seamcarvingvert.cpp | 51 +++++ tokenize.cpp | 26 +++ tokenize.h | 7 + utils.cpp | 60 +++++ utils.h | 15 ++ yolo.cpp | 236 ++++++++++++++++++++ yolo.h | 49 ++++ 19 files changed, 2169 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 incbin.h create mode 100644 intelligentroi.cpp create mode 100644 intelligentroi.h create mode 100644 log.cpp create mode 100644 log.h create mode 100644 main.cpp create mode 100644 options.h create mode 100644 readfile.h create mode 100644 seamcarving.cpp create mode 100644 seamcarving.h create mode 100644 seamcarvinghoriz.cpp create mode 100644 seamcarvingvert.cpp create mode 100644 tokenize.cpp create mode 100644 tokenize.h create mode 100644 utils.cpp create mode 100644 utils.h create mode 100644 yolo.cpp create mode 100644 yolo.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..89f8677 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.6) +project(AIImagePrepross) + +find_package(OpenCV REQUIRED) + +set(CMAKE_CXX_STANDARD 17) + +set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp) + +add_executable(${PROJECT_NAME} ${SRC_FILES}) +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb) +target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS}) +target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall) + +install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) diff --git a/incbin.h b/incbin.h new file mode 100644 index 0000000..3f662e1 --- /dev/null +++ b/incbin.h @@ -0,0 +1,476 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) || \ + defined(__ARM_NEON) || \ + defined(__ALTIVEC__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) +/* Variable argument count for overloading by arity */ +#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N +#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which size and data is + * emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + */ +#if !defined(INCBIN_OUTPUT_DATA_SECTION) +# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION +#endif + +/** + * @brief Optionally override the linker section into which size is emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + * + * @note This is useful for Harvard architectures where program memory cannot + * be directly read from the program without special instructions. With this you + * can chose to put the size variable in RAM rather than ROM. + */ +#if !defined(INCBIN_OUTPUT_SIZE_SECTION) +# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION +#endif + +#if defined(__APPLE__) +# include "TargetConditionals.h" +# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING) +# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +# endif +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * @note By default this is "g". + * + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFoo[]; + * // const unsigned char *const incbinFoo; + * // const unsigned int incbinFoo; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * @note By default this is INCBIN_STYLE_CAMEL + * + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char Foo[]; + * // extern const unsigned char *const Foo; + * // extern const unsigned int Foo; + * @endcode + * + * You may specify a custom optional data type as well as the first argument. + * @code + * INCBIN_EXTERN(custom_type, Foo); + * + * // Now you have the following symbols: + * // extern const custom_type Foo[]; + * // extern const custom_type *const Foo; + * // extern const unsigned int Foo; + * @endcode + */ +#define INCBIN_EXTERN(...) \ + INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) +#define INCBIN_EXTERN_1(NAME, ...) \ + INCBIN_EXTERN_2(unsigned char, NAME) +#define INCBIN_EXTERN_2(TYPE, NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Externally reference textual data included in another translation unit. + * + * Produces three external symbols that reference the textual data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the textual data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const char Foo[]; + * // extern const char *const Foo; + * // extern const unsigned int Foo; + * @endcode + */ +#define INCTXT_EXTERN(NAME) \ + INCBIN_EXTERN_2(char, NAME) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char Icon[]; + * // const unsigned char *const Icon; + * // const unsigned int Icon; + * @endcode + * + * You may specify a custom optional data type as well as the first argument. + * These macros are specialized by arity. + * @code + * INCBIN(custom_type, Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const custom_type Icon[]; + * // const custom_type *const Icon; + * // const unsigned int Icon; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +# define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +# define INCBIN(...) \ + INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) +# if defined(__GNUC__) +# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"") +# elif defined(__clang__) +# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"") +# else +# define INCBIN_1(...) /* Cannot do anything here */ +# endif +# define INCBIN_2(NAME, FILENAME) \ + INCBIN_3(unsigned char, NAME, FILENAME) +# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */) +# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + TERMINATOR \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(TYPE, NAME) +#endif + +/** + * @brief Include a textual file into the current translation unit. + * + * This behaves the same as INCBIN except it produces char compatible arrays + * and implicitly adds a null-terminator byte, thus the size of data included + * by this is one byte larger than that of INCBIN. + * + * Includes a textual file into the current translation unit, producing three + * symbols for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCTXT(Readme, "readme.txt"); + * + * // Now you have the following symbols: + * // const char Readme[]; + * // const char *const Readme; + * // const unsigned int Readme; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#if defined(_MSC_VER) +# define INCTXT(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +# define INCTXT(NAME, FILENAME) \ + INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n") +#endif + +#endif \ No newline at end of file diff --git a/intelligentroi.cpp b/intelligentroi.cpp new file mode 100644 index 0000000..62b47f7 --- /dev/null +++ b/intelligentroi.cpp @@ -0,0 +1,99 @@ +#include "intelligentroi.h" + +#include + +#include "utils.h" +#include "log.h" + +bool InteligentRoi::compPointPrio(const std::pair& a, const std::pair& b, const cv::Point2i& center) +{ + if(a.second != b.second) + return a.second > b.second; + + double distA = pointDist(a.first, center); + double distB = pointDist(b.first, center); + + return distA < distB; +} + +void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point) +{ + if(!pointInRect(point, rect)) + { + if(point.x < rect.x) + rect.x = point.x; + else if(point.x > rect.x+rect.width) + rect.x = point.x-rect.width; + if(point.y < rect.y) + rect.y = point.y; + else if(point.y > rect.y+rect.height) + rect.y = point.y-rect.height; + } +} + +cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector> mustInclude) +{ + int radius = std::min(imageSize.height, imageSize.width)/2; + cv::Point2i point(imageSize.width/2, imageSize.height/2); + cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2); + + std::sort(mustInclude.begin(), mustInclude.end(), + [&point](const std::pair& a, const std::pair& b){return compPointPrio(a, b, point);}); + + while(true) + { + cv::Rect includeRect = rectFromPoints(mustInclude); + if(includeRect.width-2 > radius || includeRect.height-2 > radius) + { + slideRectToPoint(candiate, mustInclude.back().first); + mustInclude.pop_back(); + Log(Log::DEBUG)<<"cant fill"; + for(const std::pair& mipoint : mustInclude) + Log(Log::DEBUG)<& includePoint : mustInclude) + slideRectToPoint(candiate, includePoint.first); + + if(candiate.x < 0) + candiate.x = 0; + if(candiate.y < 0) + candiate.y = 0; + if(candiate.x+candiate.width > imageSize.width) + candiate.width = imageSize.width-candiate.x; + if(candiate.y+candiate.height > imageSize.height) + candiate.height = imageSize.height-candiate.y; + + return candiate; +} + +InteligentRoi::InteligentRoi(const Yolo& yolo) +{ + personId = yolo.getClassForStr("person"); +} + +cv::Rect InteligentRoi::getCropRectangle(const std::vector& detections, const cv::Size2i& imageSize) +{ + if(!detections.empty()) + { + std::vector> corners; + for(size_t i = 0; i < detections.size(); ++i) + { + int priority = detections[i].priority; + if(detections[i].class_id == personId) + corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1}); + corners.push_back({detections[i].box.tl(), priority}); + corners.push_back({detections[i].box.br(), priority}); + corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority}); + corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); + } + + return maxRect(imageSize, corners); + } + + Log(Log::DEBUG)<<"Using center crop as there are no detections"; + return maxRect(imageSize); +} diff --git a/intelligentroi.h b/intelligentroi.h new file mode 100644 index 0000000..de09402 --- /dev/null +++ b/intelligentroi.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "yolo.h" + +class InteligentRoi +{ +private: + int personId; + static bool compPointPrio(const std::pair& a, const std::pair& b, const cv::Point2i& center); + static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point); + static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector> mustInclude = {}); + +public: + InteligentRoi(const Yolo& yolo); + cv::Rect getCropRectangle(const std::vector& detections, const cv::Size2i& imageSize); +}; diff --git a/log.cpp b/log.cpp new file mode 100644 index 0000000..61fa188 --- /dev/null +++ b/log.cpp @@ -0,0 +1,63 @@ +/** +* Lubricant Detecter +* Copyright (C) 2021 Carl Klemm +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* version 3 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the +* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +* Boston, MA 02110-1301, USA. +*/ + +#include "log.h" + +Log::Log(Level type, bool endlineI): endline(endlineI) +{ + msglevel = type; + if(headers) + { + operator << ("["+getLabel(type)+"] "); + } +} + +Log::~Log() +{ + if(opened && endline) + { + std::cout<<'\n'; + } + opened = false; +} + + +std::string Log::getLabel(Level level) +{ + std::string label; + switch(level) + { + case DEBUG: + label = "DEBUG"; + break; + case INFO: + label = "INFO "; + break; + case WARN: + label = "WARN "; + break; + case ERROR: + label = "ERROR"; + break; + } + return label; +} + +bool Log::headers = false; +Log::Level Log::level = WARN; diff --git a/log.h b/log.h new file mode 100644 index 0000000..c0d90eb --- /dev/null +++ b/log.h @@ -0,0 +1,64 @@ +/** +* eisgenerator +* Copyright (C) 2021 Carl Klemm +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* version 3 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the +* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +* Boston, MA 02110-1301, USA. +*/ + +#pragma once +#include +#include + +class Log +{ +public: + + enum Level + { + DEBUG, + INFO, + WARN, + ERROR + }; + +private: + bool opened = false; + Level msglevel = DEBUG; + bool endline = true; + + std::string getLabel(Level level); + +public: + + static bool headers; + static Level level; + + Log() {} + Log(Level type, bool endlineI = true); + ~Log(); + + template Log &operator<<(const T &msg) + { + if(msglevel >= level) + { + if(msglevel == ERROR) + std::cerr< +#include +#include +#include +#include +#include + +#include "yolo.h" +#include "log.h" +#include "options.h" +#include "utils.h" +#include "intelligentroi.h" + +const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector& detections, const Yolo::Detection* ignore = nullptr) +{ + const Yolo::Detection* inDetection = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(!ignore || ignore != &detection) + continue; + + if(detection.box.x <= x && detection.box.x+detection.box.width <= x) + { + if(!inDetection || detection.box.br().x > inDetection->box.br().x) + inDetection = &detection; + } + } + return inDetection; +} + +bool findRegionEndpointHoriz(int& x, const std::vector& detections, int imgSizeX) +{ + const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections); + + if(!inDetection) + { + const Yolo::Detection* closest = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(detection.box.x > x) + { + if(closest == nullptr || detection.box.x-x > closest->box.x-x) + closest = &detection; + } + } + if(closest) + x = closest->box.x; + else + x = imgSizeX; + return false; + } + else + { + x = inDetection->box.br().x; + const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection); + if(candidateDetection && candidateDetection->box.br().x > x) + return findRegionEndpointHoriz(x, detections, imgSizeX); + else + return true; + } +} + +std::vector> cutImageIntoHorzRegions(cv::Mat& image, const std::vector& detections) +{ + std::vector> out; + + for(int x = 0; x < image.cols; ++x) + { + int start = x; + bool frozen = findRegionEndpointHoriz(x, detections, image.cols); + + cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows)); + out.push_back({slice, frozen}); + } + + return out; +} + +const Yolo::Detection* pointInDetectionVert(int y, const std::vector& detections, const Yolo::Detection* ignore = nullptr) +{ + const Yolo::Detection* inDetection = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(!ignore || ignore != &detection) + continue; + + if(detection.box.y <= y && detection.box.y+detection.box.height <= y) + { + if(!inDetection || detection.box.br().y > inDetection->box.br().y) + inDetection = &detection; + } + } + return inDetection; +} + +bool findRegionEndpointVert(int& y, const std::vector& detections, int imgSizeY) +{ + const Yolo::Detection* inDetection = pointInDetectionVert(y, detections); + + if(!inDetection) + { + const Yolo::Detection* closest = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(detection.box.y > y) + { + if(closest == nullptr || detection.box.y-y > closest->box.y-y) + closest = &detection; + } + } + if(closest) + y = closest->box.y; + else + y = imgSizeY; + return false; + } + else + { + y = inDetection->box.br().y; + const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection); + if(candidateDetection && candidateDetection->box.br().y > y) + return findRegionEndpointVert(y, detections, imgSizeY); + else + return true; + } +} + +std::vector> cutImageIntoVertRegions(cv::Mat& image, const std::vector& detections) +{ + std::vector> out; + + for(int y = 0; y < image.rows; ++y) + { + int start = y; + bool frozen = findRegionEndpointVert(y, detections, image.rows); + + cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start)); + out.push_back({slice, frozen}); + } + + return out; +} + +bool seamCarveResize(cv::Mat& image, const std::vector& detections, double targetAspectRatio = 1.0) +{ + double aspectRatio = image.cols/static_cast(image.rows); + + bool vertical = false; + cv::Mat workImage; + if(aspectRatio > targetAspectRatio) + vertical = true; + + int requiredLines = 0; + if(!vertical) + requiredLines = workImage.rows*targetAspectRatio - workImage.cols; + else + requiredLines = workImage.cols/targetAspectRatio - workImage.rows; + + Log(Log::DEBUG)<<__func__<<' '<> slices = cutImageIntoHorzRegions(image, detections); + int totalResizableSize = 0; + for(const std::pair& slice : slices) + { + if(slice.second) + totalResizableSize += slice.first.cols; + } + + std::vector seamsForSlice(slices.size()); + for(size_t i = 0; i < slices.size(); ++i) + { + seamsForSlice[i] = (static_cast(slices[i].first.cols)/totalResizableSize)*requiredLines; + } + } + else + { + int totalResizableSize = 0; + std::vector> slices = cutImageIntoVertRegions(image, detections); + } + +} + +void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector& detections) +{ + for(const Yolo::Detection& detection : detections) + { + cv::rectangle(image, detection.box, detection.color, 4); + std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4); + cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0); + cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20); + cv::rectangle(image, textBox, detection.color, cv::FILLED); + cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0); + } + + cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8); +} + +int main(int argc, char* argv[]) +{ + Log::level = Log::INFO; + + Config config; + argp_parse(&argp, argc, argv, 0, 0, &config); + + if(config.outputDir.empty()) + { + Log(Log::ERROR)<<"a output path \"-o\" is required"; + return 1; + } + + if(config.imagePaths.empty()) + { + Log(Log::ERROR)<<"at least one input image or directory is required"; + return 1; + } + + std::vector imagePaths; + + for(const std::filesystem::path& path : config.imagePaths) + getImageFiles(path, imagePaths); + + if(imagePaths.empty()) + { + Log(Log::ERROR)<<"no image was found\n"; + return 1; + } + + Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false); + InteligentRoi intRoi(yolo); + + if(!std::filesystem::exists(config.outputDir)) + { + if(!std::filesystem::create_directory(config.outputDir)) + { + Log(Log::ERROR)<<"could not create directory at "< 1024) + { + if(image.cols > image.rows) + { + double ratio = 1024.0/image.cols; + cv::resize(image, image, {1024, static_cast(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC); + } + else + { + double ratio = 1024.0/image.rows; + cv::resize(image, image, {static_cast(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC); + } + } + + std::vector detections = yolo.runInference(image); + + Log(Log::DEBUG)<<"Got "< +#include +#include +#include +#include +#include "log.h" + +const char *argp_program_version = "AIImagePreprocesses"; +const char *argp_program_bug_address = ""; +static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training"; +static char args_doc[] = "[IMAGES]"; + +static struct argp_option options[] = +{ + {"verbose", 'v', 0, 0, "Show debug messages" }, + {"quiet", 'q', 0, 0, "only output data" }, + {"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" }, + {"classes", 'c', "[FILENAME]", 0, "classes text file to use" }, + {"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" }, + {"debug", 'd', 0, 0, "output debug images" }, + {"seam-carving", 's', 0, 0, "model to train: "} +}; + +struct Config +{ + std::vector imagePaths; + std::filesystem::path modelPath; + std::filesystem::path classesPath; + std::filesystem::path outputDir; + bool seamCarving = false; + bool debug = false; +}; + +static error_t parse_opt (int key, char *arg, struct argp_state *state) +{ + Config *config = reinterpret_cast(state->input); + switch (key) + { + case 'q': + Log::level = Log::ERROR; + break; + case 'v': + Log::level = Log::DEBUG; + break; + case 'm': + config->modelPath = arg; + break; + case 'c': + config->classesPath = arg; + break; + case 'd': + config->debug = true; + break; + case 'o': + config->outputDir.assign(arg); + break; + case 's': + config->seamCarving = true; + break; + case ARGP_KEY_ARG: + config->imagePaths.push_back(arg); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static struct argp argp = {options, parse_opt, args_doc, doc}; diff --git a/readfile.h b/readfile.h new file mode 100644 index 0000000..0196a0a --- /dev/null +++ b/readfile.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include +#include +#include +#include + +inline std::string readFile(const std::filesystem::path& path) +{ + std::ifstream file(path); + if(!file.is_open()) + throw std::runtime_error(std::string("could not open file ") + path.string()); + std::stringstream ss; + ss< +#include +#include +#include +#if __cplusplus >= 201703L +#include +#endif +#include + +SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) : + image(img), seams(seams), grow(grow) {} + +void SeamCarving::init() +{ + cv::Mat newFrame = image.clone(); + + for(int i = 0; i < seams; i++) + { + //Gradient Magnitude for intensity of image. + cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame); + //Use DP to create the real energy map that is used for path calculation. + // Strictly using vertical paths for testing simplicity. + cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); + + if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) + { + finalImage = image; + break; + } + std::vector seam = getLeastImportantPath(pathIntensityMat); + vecSeams.push_back(seam); + + newFrame = removeLeastImportantPath(newFrame,seam); + + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + + if (grow) + { + cv::Mat growMat = image.clone(); + + for (int i = 0; i < vecSeams.size(); i++) + { + growMat = addLeastImportantPath(growMat,vecSeams[i]); + } + finalImage = growMat; + } + else + { + finalImage = newFrame; + } + + sliderPos = seams; + +} + +void SeamCarving::computeNewFinalImage(int sliderPos) +{ + if(sliderPos == 0) + { + finalImage = image; + return; + } + if(sliderPos < 1 || sliderPos >= sliderMax-1) + { + return; + } + if(sliderPos > vecSeams.size()) + { + cv::Mat newFrame = finalImage.clone(); + for(int i = vecSeams.size()-1; i < sliderPos; i++) + { + //Gradient Magnitude for intensity of image. + cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame); + //Use DP to create the real energy map that is used for path calculation. + // Strictly using vertical paths for testing simplicity. + cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); + + if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) + { + finalImage = image; + break; + } + std::vector seam = getLeastImportantPath(pathIntensityMat); + vecSeams.push_back(seam); + newFrame = removeLeastImportantPath(newFrame,seam); + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + if (grow) + { + cv::Mat growMat = image.clone(); + + for (int i = 0; i < vecSeams.size(); i++) + { + growMat = addLeastImportantPath(growMat,vecSeams[i]); + } + + finalImage = growMat; + } + else + { + finalImage = newFrame; + } + } + else if (sliderPos < vecSeams.size()) + { + cv::Mat newFrame = image.clone(); + for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not) + { + + if (grow) + { + newFrame = addLeastImportantPath(newFrame,vecSeams[i]); + } + else + { + newFrame = removeLeastImportantPath(newFrame,vecSeams[i]); + } + + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + finalImage = newFrame; + } +} + +const cv::Mat& SeamCarving::getFinalImage() +{ + return finalImage; +} + +void SeamCarving::showSeamsImg() +{ + cv::Mat seamsFrame = image.clone(); + //std::cout << "sliderPos: " << sliderPos << std::endl; + for(int i = 0; i < sliderPos; i++) + { + seamsFrame = drawSeam(seamsFrame, vecSeams[i]); + } + cv::imwrite("output/seams_image.jpg", seamsFrame); + cv::imshow( "Image Seams", seamsFrame); +} + +static void onChange( int pos, void* object ) +{ + SeamCarving* sc = (SeamCarving*)(object); + /*if(sc->getBlockUpdateStatus()) { + return; + }*/ + sc->computeNewFinalImage(pos); + imshow("Final Image", sc->getFinalImage()); +#if DEBUG + sc->showSeamsImg(); +#endif +} +static void onMouse( int event, int x, int y, int, void* object) +{ + SeamCarving* sc = (SeamCarving*)(object); + if( event == cv::EVENT_LBUTTONDOWN || + event == cv::EVENT_RBUTTONDOWN || + event == cv::EVENT_MBUTTONDOWN + ) + { + sc->setBlockUpdate(true); + } + else if(event == cv::EVENT_LBUTTONUP || + event == cv::EVENT_RBUTTONUP || + event == cv::EVENT_MBUTTONUP) + { + sc->setBlockUpdate(false); + } +} + +void SeamCarving::setBlockUpdate(bool bUpdate) +{ + blockUpdate = bUpdate; +} + +bool SeamCarving::getBlockUpdateStatus() +{ + return blockUpdate; +} + +void SeamCarving::showImage() +{ +#if __cplusplus >= 201703L + if(!std::filesystem::exists("output")) + { + std::filesystem::create_directory("output"); + } +#endif + if( image.empty() ) + { + std::cout << "Could not open raw image" << std::endl ; + return; + } + namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE ); + cv::imshow( "Raw Image", image ); + + if( finalImage.empty() ) + { + std::cout << "Could not open final image" << std::endl ; + return; + } +#if DEBUG + namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE ); + cv::Mat gradient = computeGradientMagnitude(image); + cv::Mat u8_image; + gradient.convertTo(u8_image, CV_8U); + + cv::imwrite("output/gradient_image.jpg", u8_image); + cv::imshow("gradient Image", u8_image); + + namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE ); + cv::Mat u8_image2; + cv::Mat intensityMat = computePathIntensityMat(gradient); + cv::Mat dst; + cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX); + dst.convertTo(u8_image2, CV_8U); + cv::imwrite("output/intensity_image.jpg", u8_image2); + cv::imshow( "intensity Image", u8_image2); + + //cv::Mat engImg = GetEnergyImg(image); + //namedWindow("energy Image", cv::WINDOW_AUTOSIZE); + //cv::Mat u8_image3; + //engImg.convertTo(u8_image3, CV_8U); + //cv::imshow( "energy Image", u8_image3); + namedWindow("Image Seams", cv::WINDOW_AUTOSIZE); + showSeamsImg(); + +#endif + + namedWindow( "Final Image", cv::WINDOW_AUTOSIZE ); + cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this); + //cv::setMouseCallback("Final Image", onMouse, this ); + cv::imwrite("output/final_image.jpg", finalImage); + cv::imshow("Final Image", finalImage); + cv::waitKey(0); +} + +cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img) +{ + // find partial derivative of x-axis and y-axis seperately + // sum up the partial derivates + float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1}; + cv::Mat xFilter(3, 3, CV_32FC1, pd); + cv::Mat yFilter = xFilter.t(); + cv::Mat grayImg; + cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY); + cv::Mat dxImg; + cv::Mat dyImg; + + cv::filter2D(grayImg, dxImg, 0, xFilter); + cv::filter2D(grayImg, dyImg, 0, yFilter); + //cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type()); + //cv::Mat absDxImg; + //cv::Mat absDyImg; + //cv::absdiff(dxImg, zeroMat, absDxImg); + //cv::absdiff(dyImg, zeroMat, absDyImg); + cv::Mat absDxImg = cv::abs(dxImg); + cv::Mat absDyImg = cv::abs(dyImg); + + cv::Mat energyImg; + cv::add(absDxImg, absDyImg, energyImg); + return energyImg; +} + +cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame) +{ + cv::Mat grayScale; + cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY); + cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1); + cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1); + cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1); + Sobel(grayScale, drv, CV_16SC1, 1, 0); + drv.convertTo(drv32f, CV_32FC1); + cv::accumulateSquare(drv32f, mag); + Sobel(grayScale, drv, CV_16SC1, 0, 1); + drv.convertTo(drv32f, CV_32FC1); + cv::accumulateSquare(drv32f, mag); + cv::sqrt(mag, mag); + return mag; +} + +float SeamCarving::intensity(float currIndex, int start, int end) +{ + if(start < 0 || start >= end) + { + return FLT_MAX; + } + else + { + return currIndex; + } +} + +cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap) +{ + cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1); + + if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0) + { + return cv::Mat(); + } + + //First row of intensity paths is the same as the energy map + rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0)); + float max = 0; + + //The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity. + for(int row = 1; row < pathIntensityMap.rows; row++) + { + for(int col = 0; col < pathIntensityMap.cols; col++) + { + //The initial intensity of the pixel is its raw intensity + float pixelIntensity = rawEnergyMap.at(row, col); + //The minimum intensity from the current path of the 3 pixels above it is added to its intensity. + float p1 = intensity(pathIntensityMap.at(row-1, col-1), col - 1, pathIntensityMap.cols); + float p2 = intensity(pathIntensityMap.at(row-1, col), col, pathIntensityMap.cols); + float p3 = intensity(pathIntensityMap.at(row-1, col+1), col + 1, pathIntensityMap.cols); + + float minIntensity = std::min(p1, p2); + minIntensity = std::min(minIntensity, p3); + + pixelIntensity += minIntensity; + + max = std::max(max, pixelIntensity); + pathIntensityMap.at(row, col) = pixelIntensity; + } + } + return pathIntensityMap; +} + +std::vector SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap) +{ + if(importanceMap.total() == 0) + { + return std::vector(); + } + + //Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable. + float minImportance = importanceMap.at(importanceMap.rows - 1, 0); + int minCol = 0; + for (int col = 1; col < importanceMap.cols; col++) + { + float currPixel =importanceMap.at(importanceMap.rows - 1, col); + if(currPixel < minImportance) + { + minCol = col; + minImportance = currPixel; + } + } + + std::vector leastEnergySeam(importanceMap.rows); + leastEnergySeam[importanceMap.rows-1] = minCol; + for(int row = importanceMap.rows - 2; row >= 0; row--) + { + float p1 = intensity(importanceMap.at(row, minCol-1), minCol - 1, importanceMap.cols); + float p2 = intensity(importanceMap.at(row, minCol), minCol, importanceMap.cols); + float p3 = intensity(importanceMap.at(row, minCol+1), minCol + 1, importanceMap.cols); + //Adjust the min column for path following + if(p1 < p2 && p1 < p3) + { + minCol -= 1; + } + else if(p3 < p1 && p3 < p2) + { + minCol += 1; + } + leastEnergySeam[row] = minCol; + } + + return leastEnergySeam; +} + +cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector &seam) +{ + cv::Size orgSize = original.size(); + // new mat needs to shrink by one collumn + cv::Size size = cv::Size(orgSize.width-1, orgSize.height); + cv::Mat newMat = cv::Mat(size, original.type()); + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = newMat.data; + for(int row = 0; row < seam.size(); row++) + { + removePixel(original, newMat, row, seam[row]); + } + return newMat; +} + +void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) +{ + int width = original.cols; + int channels = original.channels(); + int originRowStart = row * channels * width; + int newRowStart = row * channels * (width - 1); + int firstNum = minCol * channels; + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = outputMat.data; + + //std::cout << "originRowStart: " << originRowStart << std::endl; + //std::cout << "newRowStart: " << newRowStart << std::endl; + //std::cout << "firstNum: " << firstNum << std::endl; + memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); + + int originRowMid = originRowStart + (minCol + 1) * channels; + int newRowMid = newRowStart + minCol * channels; + int secondNum = (width - 1) * channels - firstNum; + + //std::cout << "originRowMid: " << originRowMid << std::endl; + //std::cout << "newRowMid: " << newRowMid << std::endl; + //std::cout << "secondNum: " << secondNum << std::endl; + memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); + + int leftPixel = minCol - 1; + int rightPixel = minCol + 1; + + int byte1 = rawOrig[originRowStart + minCol * channels]; + int byte2 = rawOrig[originRowStart + minCol * channels + 1]; + int byte3 = rawOrig[originRowStart + minCol * channels + 2]; + + if (rightPixel < width) + { + int byte1R = rawOrig[originRowStart + rightPixel * channels]; + int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; + int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; + rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); + rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); + rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); + } + + if(leftPixel >= 0) + { + int byte1L = rawOrig[originRowStart + leftPixel*channels]; + int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; + int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; + rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); + rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); + rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); + } +} + +cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector &seam) +{ + cv::Size orgSize = original.size(); + // new mat needs to grow by one column + cv::Size size = cv::Size(orgSize.width+1, orgSize.height); + cv::Mat newMat = cv::Mat(size, original.type()); + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = newMat.data; + for(int row = 0; row < seam.size(); row++) + { + //std::cout << "row: " << row << ", col: " << seam[row] << std::endl; + addPixel(original, newMat, row, seam[row]); + } + return newMat; +} + +void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) +{ + int width = original.cols; + int channels = original.channels(); + int originRowStart = row * channels * width; + int newRowStart = row * channels * (width + 1); + int firstNum = (minCol + 1) * channels; + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = outputMat.data; + + memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); + + memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels); + + int originRowMid = originRowStart + ((minCol + 1) * channels); + int newRowMid = newRowStart + ((minCol + 2) * channels); + int secondNum = (width * channels) - firstNum; + + memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); + + int leftPixel = minCol - 1; + int rightPixel = minCol + 1; + + int byte1 = rawOrig[originRowStart + minCol * channels]; + int byte2 = rawOrig[originRowStart + minCol * channels + 1]; + int byte3 = rawOrig[originRowStart + minCol * channels + 2]; + + if (rightPixel < width) + { + int byte1R = rawOrig[originRowStart + rightPixel * channels]; + int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; + int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; + rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); + rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); + rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); + } + + if(leftPixel >= 0) + { + int byte1L = rawOrig[originRowStart + leftPixel*channels]; + int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; + int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; + rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); + rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); + rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); + } +} diff --git a/seamcarving.h b/seamcarving.h new file mode 100644 index 0000000..feb133d --- /dev/null +++ b/seamcarving.h @@ -0,0 +1,61 @@ +#ifndef __SEAM__CARVING_HPP__ +#define __SEAM__CARVING_HPP__ + +#include +#define DEBUG 0 + +class SeamCarving { + public: + void showImage(); + const cv::Mat& getFinalImage(); + virtual void computeNewFinalImage(int pos); + void setBlockUpdate(bool bUpdate); + bool getBlockUpdateStatus(); + virtual void showSeamsImg(); + + protected: + SeamCarving(const cv::Mat &img, int seams, bool grow); + void init(); + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) = 0; + cv::Mat image; + cv::Mat finalImage; + int seams; + bool grow; + int sliderMax; + int sliderPos; + std::vector> vecSeams; + + private: + cv::Mat GetEnergyImg(const cv::Mat &img); + cv::Mat computeGradientMagnitude(const cv::Mat &frame); + float intensity(float currIndex, int start, int end); + cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap); + std::vector getLeastImportantPath(const cv::Mat &importanceMap); + cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector &seam); + void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol); + cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector &seam); + void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol); + bool blockUpdate = false; + +}; + +class SeamCarvingHorizontal : public SeamCarving +{ + public: + SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false); + protected: + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) override; +}; + +class SeamCarvingVertical : public SeamCarving { + public: + SeamCarvingVertical(char* fileName, int seams=100, bool grow=false); + virtual void computeNewFinalImage(int pos) override; +#if DEBUG + virtual void showSeamsImg() override; +#endif + protected: + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) override; +}; + +#endif // __SEAM__CARVING_HPP__ diff --git a/seamcarvinghoriz.cpp b/seamcarvinghoriz.cpp new file mode 100644 index 0000000..98886fc --- /dev/null +++ b/seamcarvinghoriz.cpp @@ -0,0 +1,28 @@ +#include "seamcarving.h" +#include +#include +#include +#include +#include + +cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector &seam) +{ + cv::Mat retMat = frame.clone(); + for(int row = 0; row < frame.rows; row++) + { + for(int col = 0; col < frame.cols; col++) + { + retMat.at(row, seam[row])[0] = 0; + retMat.at(row, seam[row])[1] = 255; + retMat.at(row, seam[row])[2] = 0; + } + } + return retMat; +} + +SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) : + SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow) +{ + sliderMax = image.cols; + init(); +} diff --git a/seamcarvingvert.cpp b/seamcarvingvert.cpp new file mode 100644 index 0000000..41c045b --- /dev/null +++ b/seamcarvingvert.cpp @@ -0,0 +1,51 @@ +#include "seamcarving.h" +#include +#include +#include +#include +#include + +SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) : + SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow) +{ + sliderMax = image.rows; + cv::Mat oldImage = image; + image = image.t(); + init(); + image = oldImage; + finalImage = finalImage.t(); +} + +cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector &seam) +{ + cv::Mat retMat = frame.clone(); + for(int col = 0; col < frame.cols; col++) + { + for(int row = 0; row < frame.rows; row++) + { + retMat.at(seam[col], col)[0] = 0; + retMat.at(seam[col], col)[1] = 255; + retMat.at(seam[col], col)[2] = 0; + } + } + return retMat; +} + +void SeamCarvingVertical::computeNewFinalImage(int pos) +{ + cv::Mat oldImage = image; + image = image.t(); + SeamCarving::computeNewFinalImage(pos); + image = oldImage; + finalImage = finalImage.t(); +} + +#if DEBUG +void SeamCarvingVertical::showSeamsImg() +{ + cv::Mat oldImage = this->image; + this->image = this->image.t(); + SeamCarving::showImage(); + this->image = oldImage; +} +#endif diff --git a/tokenize.cpp b/tokenize.cpp new file mode 100644 index 0000000..af8509b --- /dev/null +++ b/tokenize.cpp @@ -0,0 +1,26 @@ +#include "tokenize.h" + + +std::vector tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar) +{ + std::vector tokens; + std::string token; + bool inBaracket = false; + for(size_t i = 0; i < str.size(); ++i) + { + if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar)) + { + tokens.push_back(token); + token.clear(); + } + else + { + token.push_back(str[i]); + } + if(ignoreBraket == str[i]) + inBaracket = !inBaracket; + } + if(!inBaracket) + tokens.push_back(token); + return tokens; +} diff --git a/tokenize.h b/tokenize.h new file mode 100644 index 0000000..6641e5e --- /dev/null +++ b/tokenize.h @@ -0,0 +1,7 @@ +#pragma once + +#include +#include + +std::vector tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0', + const char escapeChar = '\0'); diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..65b2fdb --- /dev/null +++ b/utils.cpp @@ -0,0 +1,60 @@ +#include "utils.h" + +#include +#include +#include + +bool isImagePath(const std::filesystem::path& path) +{ + return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg"); +} + +void getImageFiles(const std::filesystem::path& path, std::vector& paths) +{ + if(isImagePath(path)) + { + paths.push_back(path); + } + else if(std::filesystem::is_directory(path)) + { + for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path)) + { + if(std::filesystem::is_directory(dirent.path())) + getImageFiles(dirent.path(), paths); + else if(isImagePath(dirent.path())) + paths.push_back(dirent.path()); + } + } +} + +cv::Rect rectFromPoints(const std::vector>& points) +{ + int left = std::numeric_limits::max(); + int right = std::numeric_limits::min(); + int top = std::numeric_limits::max(); + int bottom = std::numeric_limits::min(); + + for(const std::pair& point : points) + { + left = point.first.x < left ? point.first.x : left; + right = point.first.x > right ? point.first.x : right; + + top = point.first.y < top ? point.first.y : top; + bottom = point.first.y > bottom ? point.first.y : bottom; + } + + return cv::Rect(left, top, right-left, bottom-top); +} + +double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB) +{ + cv::Vec2i a(pointA.x, pointA.y); + cv::Vec2i b(pointB.x, pointB.y); + return cv::norm(a-b); +} + +bool pointInRect(const cv::Point2i& point, const cv::Rect& rect) +{ + return point.x >= rect.x && point.x <= rect.x+rect.width && + point.y >= rect.y && point.y <= rect.y+rect.height; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..33a88df --- /dev/null +++ b/utils.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include +#include + +bool isImagePath(const std::filesystem::path& path); + +void getImageFiles(const std::filesystem::path& path, std::vector& paths); + +cv::Rect rectFromPoints(const std::vector>& points); + +double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB); + +bool pointInRect(const cv::Point2i& point, const cv::Rect& rect); diff --git a/yolo.cpp b/yolo.cpp new file mode 100644 index 0000000..11c9759 --- /dev/null +++ b/yolo.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include + +#include "yolo.h" +#include "readfile.h" +#include "tokenize.h" +#include "log.h" + +#define INCBIN_PREFIX r +#include "incbin.h" + +INCTXT(defaultClasses, "../classes.txt"); +INCBIN(defaultModel, "../yolov8x.onnx"); + +Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape, + const std::filesystem::path& classesTxtFilePath, bool runWithOCl) +{ + modelPath = onnxModelPath; + modelShape = modelInputShape; + + if(classesTxtFilePath.empty()) + { + loadClasses(rdefaultClassesData); + } + else + { + std::string classesStr = readFile(classesTxtFilePath); + loadClasses(classesStr); + } + + if(!modelPath.empty()) + net = cv::dnn::readNetFromONNX(modelPath); + else + net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize); + + if(runWithOCl) + { + std::cout << "\nRunning on OCV" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT); + net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL); + } + else + { + std::cout << "\nRunning on CPU" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); + } +} + +std::vector Yolo::runInference(const cv::Mat &input) +{ + cv::Mat modelInput = input; + if (letterBoxForSquare && modelShape.width == modelShape.height) + modelInput = formatToSquare(modelInput); + + cv::Mat blob; + cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); + net.setInput(blob); + + std::vector outputs; + net.forward(outputs, net.getUnconnectedOutLayersNames()); + + int rows = outputs[0].size[1]; + int dimensions = outputs[0].size[2]; + + bool yolov8 = false; + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + yolov8 = true; + rows = outputs[0].size[2]; + dimensions = outputs[0].size[1]; + + outputs[0] = outputs[0].reshape(1, dimensions); + cv::transpose(outputs[0], outputs[0]); + } + float *data = (float *)outputs[0].data; + + float x_factor = modelInput.cols / modelShape.width; + float y_factor = modelInput.rows / modelShape.height; + + std::vector class_ids; + std::vector confidences; + std::vector boxes; + + for (int i = 0; i < rows; ++i) + { + if (yolov8) + { + float *classes_scores = data+4; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore; + + minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > modelScoreThreshold) + { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + else // yolov5 + { + float confidence = data[4]; + + if (confidence >= modelConfidenceThreshold) + { + float *classes_scores = data+5; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double max_class_score; + + minMaxLoc(scores, 0, &max_class_score, 0, &class_id); + + if (max_class_score > modelScoreThreshold) + { + confidences.push_back(confidence); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + } + + data += dimensions; + } + + std::vector nms_result; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); + + std::vector detections{}; + for(unsigned long i = 0; i < nms_result.size(); ++i) + { + int idx = nms_result[i]; + + Yolo::Detection result; + result.class_id = class_ids[idx]; + result.confidence = confidences[idx]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(100, 255); + result.color = cv::Scalar(dis(gen), + dis(gen), + dis(gen)); + + result.className = classes[result.class_id].first; + result.priority = classes[result.class_id].second; + result.box = boxes[idx]; + + detections.push_back(result); + } + + return detections; +} + +void Yolo::loadClasses(const std::string& classesStr) +{ + std::vector candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\'); + classes.clear(); + for(std::string& instance : candidateClasses) + { + if(instance.size() < 2) + continue; + + std::vector tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\'); + + if(*tokens[0].begin() == '"') + instance.erase(tokens[0].begin()); + if(tokens[0].back() == '"') + tokens[0].pop_back(); + int priority = -1; + if(tokens.size() > 1) + { + try + { + priority = std::stoi(tokens[1]); + } + catch(const std::invalid_argument& err) + { + Log(Log::WARN)<<"unable to get priority for class "< +#include +#include +#include +#include +#include +#include +#include + +class Yolo +{ +public: + struct Detection + { + int class_id = 0; + std::string className; + float confidence = 0.0; + int priority = -1; + cv::Scalar color; + cv::Rect box; + }; + +private: + static constexpr float modelConfidenceThreshold = 0.25; + static constexpr float modelScoreThreshold = 0.45; + static constexpr float modelNMSThreshold = 0.50; + + void loadClasses(const std::string& classes); + void loadOnnxNetwork(const std::filesystem::path& path); + cv::Mat formatToSquare(const cv::Mat &source); + + std::string modelPath; + + std::vector> classes; + + cv::Size2f modelShape; + + bool letterBoxForSquare = true; + + cv::dnn::Net net; + +public: + Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480}, + const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true); + std::vector runInference(const cv::Mat &input); + int getClassForStr(const std::string& str) const; +};