commit 438c9d726caa62821c2c4932a310d53150458d44 Author: uvos Date: Wed Jun 28 23:59:50 2023 +0200 inital commit diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..89f8677 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.6) +project(AIImagePrepross) + +find_package(OpenCV REQUIRED) + +set(CMAKE_CXX_STANDARD 17) + +set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp) + +add_executable(${PROJECT_NAME} ${SRC_FILES}) +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb) +target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS}) +target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall) + +install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) diff --git a/incbin.h b/incbin.h new file mode 100644 index 0000000..3f662e1 --- /dev/null +++ b/incbin.h @@ -0,0 +1,476 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) || \ + defined(__ARM_NEON) || \ + defined(__ALTIVEC__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) +/* Variable argument count for overloading by arity */ +#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N +#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which size and data is + * emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + */ +#if !defined(INCBIN_OUTPUT_DATA_SECTION) +# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION +#endif + +/** + * @brief Optionally override the linker section into which size is emitted. + * + * @warning If you use this facility, you might have to deal with + * platform-specific linker output section naming on your own. + * + * @note This is useful for Harvard architectures where program memory cannot + * be directly read from the program without special instructions. With this you + * can chose to put the size variable in RAM rather than ROM. + */ +#if !defined(INCBIN_OUTPUT_SIZE_SECTION) +# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION +#endif + +#if defined(__APPLE__) +# include "TargetConditionals.h" +# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING) +# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +# endif +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * @note By default this is "g". + * + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFoo[]; + * // const unsigned char *const incbinFoo; + * // const unsigned int incbinFoo; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * @note By default this is INCBIN_STYLE_CAMEL + * + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char Foo[]; + * // extern const unsigned char *const Foo; + * // extern const unsigned int Foo; + * @endcode + * + * You may specify a custom optional data type as well as the first argument. + * @code + * INCBIN_EXTERN(custom_type, Foo); + * + * // Now you have the following symbols: + * // extern const custom_type Foo[]; + * // extern const custom_type *const Foo; + * // extern const unsigned int Foo; + * @endcode + */ +#define INCBIN_EXTERN(...) \ + INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) +#define INCBIN_EXTERN_1(NAME, ...) \ + INCBIN_EXTERN_2(unsigned char, NAME) +#define INCBIN_EXTERN_2(TYPE, NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Externally reference textual data included in another translation unit. + * + * Produces three external symbols that reference the textual data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the textual data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const char Foo[]; + * // extern const char *const Foo; + * // extern const unsigned int Foo; + * @endcode + */ +#define INCTXT_EXTERN(NAME) \ + INCBIN_EXTERN_2(char, NAME) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char Icon[]; + * // const unsigned char *const Icon; + * // const unsigned int Icon; + * @endcode + * + * You may specify a custom optional data type as well as the first argument. + * These macros are specialized by arity. + * @code + * INCBIN(custom_type, Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const custom_type Icon[]; + * // const custom_type *const Icon; + * // const unsigned int Icon; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +# define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +# define INCBIN(...) \ + INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) +# if defined(__GNUC__) +# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"") +# elif defined(__clang__) +# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"") +# else +# define INCBIN_1(...) /* Cannot do anything here */ +# endif +# define INCBIN_2(NAME, FILENAME) \ + INCBIN_3(unsigned char, NAME, FILENAME) +# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */) +# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + TERMINATOR \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(TYPE, NAME) +#endif + +/** + * @brief Include a textual file into the current translation unit. + * + * This behaves the same as INCBIN except it produces char compatible arrays + * and implicitly adds a null-terminator byte, thus the size of data included + * by this is one byte larger than that of INCBIN. + * + * Includes a textual file into the current translation unit, producing three + * symbols for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCTXT(Readme, "readme.txt"); + * + * // Now you have the following symbols: + * // const char Readme[]; + * // const char *const Readme; + * // const unsigned int Readme; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#if defined(_MSC_VER) +# define INCTXT(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +# define INCTXT(NAME, FILENAME) \ + INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n") +#endif + +#endif \ No newline at end of file diff --git a/intelligentroi.cpp b/intelligentroi.cpp new file mode 100644 index 0000000..62b47f7 --- /dev/null +++ b/intelligentroi.cpp @@ -0,0 +1,99 @@ +#include "intelligentroi.h" + +#include + +#include "utils.h" +#include "log.h" + +bool InteligentRoi::compPointPrio(const std::pair& a, const std::pair& b, const cv::Point2i& center) +{ + if(a.second != b.second) + return a.second > b.second; + + double distA = pointDist(a.first, center); + double distB = pointDist(b.first, center); + + return distA < distB; +} + +void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point) +{ + if(!pointInRect(point, rect)) + { + if(point.x < rect.x) + rect.x = point.x; + else if(point.x > rect.x+rect.width) + rect.x = point.x-rect.width; + if(point.y < rect.y) + rect.y = point.y; + else if(point.y > rect.y+rect.height) + rect.y = point.y-rect.height; + } +} + +cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector> mustInclude) +{ + int radius = std::min(imageSize.height, imageSize.width)/2; + cv::Point2i point(imageSize.width/2, imageSize.height/2); + cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2); + + std::sort(mustInclude.begin(), mustInclude.end(), + [&point](const std::pair& a, const std::pair& b){return compPointPrio(a, b, point);}); + + while(true) + { + cv::Rect includeRect = rectFromPoints(mustInclude); + if(includeRect.width-2 > radius || includeRect.height-2 > radius) + { + slideRectToPoint(candiate, mustInclude.back().first); + mustInclude.pop_back(); + Log(Log::DEBUG)<<"cant fill"; + for(const std::pair& mipoint : mustInclude) + Log(Log::DEBUG)<& includePoint : mustInclude) + slideRectToPoint(candiate, includePoint.first); + + if(candiate.x < 0) + candiate.x = 0; + if(candiate.y < 0) + candiate.y = 0; + if(candiate.x+candiate.width > imageSize.width) + candiate.width = imageSize.width-candiate.x; + if(candiate.y+candiate.height > imageSize.height) + candiate.height = imageSize.height-candiate.y; + + return candiate; +} + +InteligentRoi::InteligentRoi(const Yolo& yolo) +{ + personId = yolo.getClassForStr("person"); +} + +cv::Rect InteligentRoi::getCropRectangle(const std::vector& detections, const cv::Size2i& imageSize) +{ + if(!detections.empty()) + { + std::vector> corners; + for(size_t i = 0; i < detections.size(); ++i) + { + int priority = detections[i].priority; + if(detections[i].class_id == personId) + corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1}); + corners.push_back({detections[i].box.tl(), priority}); + corners.push_back({detections[i].box.br(), priority}); + corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority}); + corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); + } + + return maxRect(imageSize, corners); + } + + Log(Log::DEBUG)<<"Using center crop as there are no detections"; + return maxRect(imageSize); +} diff --git a/intelligentroi.h b/intelligentroi.h new file mode 100644 index 0000000..de09402 --- /dev/null +++ b/intelligentroi.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "yolo.h" + +class InteligentRoi +{ +private: + int personId; + static bool compPointPrio(const std::pair& a, const std::pair& b, const cv::Point2i& center); + static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point); + static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector> mustInclude = {}); + +public: + InteligentRoi(const Yolo& yolo); + cv::Rect getCropRectangle(const std::vector& detections, const cv::Size2i& imageSize); +}; diff --git a/log.cpp b/log.cpp new file mode 100644 index 0000000..61fa188 --- /dev/null +++ b/log.cpp @@ -0,0 +1,63 @@ +/** +* Lubricant Detecter +* Copyright (C) 2021 Carl Klemm +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* version 3 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the +* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +* Boston, MA 02110-1301, USA. +*/ + +#include "log.h" + +Log::Log(Level type, bool endlineI): endline(endlineI) +{ + msglevel = type; + if(headers) + { + operator << ("["+getLabel(type)+"] "); + } +} + +Log::~Log() +{ + if(opened && endline) + { + std::cout<<'\n'; + } + opened = false; +} + + +std::string Log::getLabel(Level level) +{ + std::string label; + switch(level) + { + case DEBUG: + label = "DEBUG"; + break; + case INFO: + label = "INFO "; + break; + case WARN: + label = "WARN "; + break; + case ERROR: + label = "ERROR"; + break; + } + return label; +} + +bool Log::headers = false; +Log::Level Log::level = WARN; diff --git a/log.h b/log.h new file mode 100644 index 0000000..c0d90eb --- /dev/null +++ b/log.h @@ -0,0 +1,64 @@ +/** +* eisgenerator +* Copyright (C) 2021 Carl Klemm +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* version 3 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the +* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +* Boston, MA 02110-1301, USA. +*/ + +#pragma once +#include +#include + +class Log +{ +public: + + enum Level + { + DEBUG, + INFO, + WARN, + ERROR + }; + +private: + bool opened = false; + Level msglevel = DEBUG; + bool endline = true; + + std::string getLabel(Level level); + +public: + + static bool headers; + static Level level; + + Log() {} + Log(Level type, bool endlineI = true); + ~Log(); + + template Log &operator<<(const T &msg) + { + if(msglevel >= level) + { + if(msglevel == ERROR) + std::cerr< +#include +#include +#include +#include +#include + +#include "yolo.h" +#include "log.h" +#include "options.h" +#include "utils.h" +#include "intelligentroi.h" + +const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector& detections, const Yolo::Detection* ignore = nullptr) +{ + const Yolo::Detection* inDetection = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(!ignore || ignore != &detection) + continue; + + if(detection.box.x <= x && detection.box.x+detection.box.width <= x) + { + if(!inDetection || detection.box.br().x > inDetection->box.br().x) + inDetection = &detection; + } + } + return inDetection; +} + +bool findRegionEndpointHoriz(int& x, const std::vector& detections, int imgSizeX) +{ + const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections); + + if(!inDetection) + { + const Yolo::Detection* closest = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(detection.box.x > x) + { + if(closest == nullptr || detection.box.x-x > closest->box.x-x) + closest = &detection; + } + } + if(closest) + x = closest->box.x; + else + x = imgSizeX; + return false; + } + else + { + x = inDetection->box.br().x; + const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection); + if(candidateDetection && candidateDetection->box.br().x > x) + return findRegionEndpointHoriz(x, detections, imgSizeX); + else + return true; + } +} + +std::vector> cutImageIntoHorzRegions(cv::Mat& image, const std::vector& detections) +{ + std::vector> out; + + for(int x = 0; x < image.cols; ++x) + { + int start = x; + bool frozen = findRegionEndpointHoriz(x, detections, image.cols); + + cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows)); + out.push_back({slice, frozen}); + } + + return out; +} + +const Yolo::Detection* pointInDetectionVert(int y, const std::vector& detections, const Yolo::Detection* ignore = nullptr) +{ + const Yolo::Detection* inDetection = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(!ignore || ignore != &detection) + continue; + + if(detection.box.y <= y && detection.box.y+detection.box.height <= y) + { + if(!inDetection || detection.box.br().y > inDetection->box.br().y) + inDetection = &detection; + } + } + return inDetection; +} + +bool findRegionEndpointVert(int& y, const std::vector& detections, int imgSizeY) +{ + const Yolo::Detection* inDetection = pointInDetectionVert(y, detections); + + if(!inDetection) + { + const Yolo::Detection* closest = nullptr; + for(const Yolo::Detection& detection : detections) + { + if(detection.box.y > y) + { + if(closest == nullptr || detection.box.y-y > closest->box.y-y) + closest = &detection; + } + } + if(closest) + y = closest->box.y; + else + y = imgSizeY; + return false; + } + else + { + y = inDetection->box.br().y; + const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection); + if(candidateDetection && candidateDetection->box.br().y > y) + return findRegionEndpointVert(y, detections, imgSizeY); + else + return true; + } +} + +std::vector> cutImageIntoVertRegions(cv::Mat& image, const std::vector& detections) +{ + std::vector> out; + + for(int y = 0; y < image.rows; ++y) + { + int start = y; + bool frozen = findRegionEndpointVert(y, detections, image.rows); + + cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start)); + out.push_back({slice, frozen}); + } + + return out; +} + +bool seamCarveResize(cv::Mat& image, const std::vector& detections, double targetAspectRatio = 1.0) +{ + double aspectRatio = image.cols/static_cast(image.rows); + + bool vertical = false; + cv::Mat workImage; + if(aspectRatio > targetAspectRatio) + vertical = true; + + int requiredLines = 0; + if(!vertical) + requiredLines = workImage.rows*targetAspectRatio - workImage.cols; + else + requiredLines = workImage.cols/targetAspectRatio - workImage.rows; + + Log(Log::DEBUG)<<__func__<<' '<> slices = cutImageIntoHorzRegions(image, detections); + int totalResizableSize = 0; + for(const std::pair& slice : slices) + { + if(slice.second) + totalResizableSize += slice.first.cols; + } + + std::vector seamsForSlice(slices.size()); + for(size_t i = 0; i < slices.size(); ++i) + { + seamsForSlice[i] = (static_cast(slices[i].first.cols)/totalResizableSize)*requiredLines; + } + } + else + { + int totalResizableSize = 0; + std::vector> slices = cutImageIntoVertRegions(image, detections); + } + +} + +void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector& detections) +{ + for(const Yolo::Detection& detection : detections) + { + cv::rectangle(image, detection.box, detection.color, 4); + std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4); + cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0); + cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20); + cv::rectangle(image, textBox, detection.color, cv::FILLED); + cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0); + } + + cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8); +} + +int main(int argc, char* argv[]) +{ + Log::level = Log::INFO; + + Config config; + argp_parse(&argp, argc, argv, 0, 0, &config); + + if(config.outputDir.empty()) + { + Log(Log::ERROR)<<"a output path \"-o\" is required"; + return 1; + } + + if(config.imagePaths.empty()) + { + Log(Log::ERROR)<<"at least one input image or directory is required"; + return 1; + } + + std::vector imagePaths; + + for(const std::filesystem::path& path : config.imagePaths) + getImageFiles(path, imagePaths); + + if(imagePaths.empty()) + { + Log(Log::ERROR)<<"no image was found\n"; + return 1; + } + + Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false); + InteligentRoi intRoi(yolo); + + if(!std::filesystem::exists(config.outputDir)) + { + if(!std::filesystem::create_directory(config.outputDir)) + { + Log(Log::ERROR)<<"could not create directory at "< 1024) + { + if(image.cols > image.rows) + { + double ratio = 1024.0/image.cols; + cv::resize(image, image, {1024, static_cast(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC); + } + else + { + double ratio = 1024.0/image.rows; + cv::resize(image, image, {static_cast(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC); + } + } + + std::vector detections = yolo.runInference(image); + + Log(Log::DEBUG)<<"Got "< +#include +#include +#include +#include +#include "log.h" + +const char *argp_program_version = "AIImagePreprocesses"; +const char *argp_program_bug_address = ""; +static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training"; +static char args_doc[] = "[IMAGES]"; + +static struct argp_option options[] = +{ + {"verbose", 'v', 0, 0, "Show debug messages" }, + {"quiet", 'q', 0, 0, "only output data" }, + {"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" }, + {"classes", 'c', "[FILENAME]", 0, "classes text file to use" }, + {"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" }, + {"debug", 'd', 0, 0, "output debug images" }, + {"seam-carving", 's', 0, 0, "model to train: "} +}; + +struct Config +{ + std::vector imagePaths; + std::filesystem::path modelPath; + std::filesystem::path classesPath; + std::filesystem::path outputDir; + bool seamCarving = false; + bool debug = false; +}; + +static error_t parse_opt (int key, char *arg, struct argp_state *state) +{ + Config *config = reinterpret_cast(state->input); + switch (key) + { + case 'q': + Log::level = Log::ERROR; + break; + case 'v': + Log::level = Log::DEBUG; + break; + case 'm': + config->modelPath = arg; + break; + case 'c': + config->classesPath = arg; + break; + case 'd': + config->debug = true; + break; + case 'o': + config->outputDir.assign(arg); + break; + case 's': + config->seamCarving = true; + break; + case ARGP_KEY_ARG: + config->imagePaths.push_back(arg); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static struct argp argp = {options, parse_opt, args_doc, doc}; diff --git a/readfile.h b/readfile.h new file mode 100644 index 0000000..0196a0a --- /dev/null +++ b/readfile.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include +#include +#include +#include + +inline std::string readFile(const std::filesystem::path& path) +{ + std::ifstream file(path); + if(!file.is_open()) + throw std::runtime_error(std::string("could not open file ") + path.string()); + std::stringstream ss; + ss< +#include +#include +#include +#if __cplusplus >= 201703L +#include +#endif +#include + +SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) : + image(img), seams(seams), grow(grow) {} + +void SeamCarving::init() +{ + cv::Mat newFrame = image.clone(); + + for(int i = 0; i < seams; i++) + { + //Gradient Magnitude for intensity of image. + cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame); + //Use DP to create the real energy map that is used for path calculation. + // Strictly using vertical paths for testing simplicity. + cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); + + if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) + { + finalImage = image; + break; + } + std::vector seam = getLeastImportantPath(pathIntensityMat); + vecSeams.push_back(seam); + + newFrame = removeLeastImportantPath(newFrame,seam); + + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + + if (grow) + { + cv::Mat growMat = image.clone(); + + for (int i = 0; i < vecSeams.size(); i++) + { + growMat = addLeastImportantPath(growMat,vecSeams[i]); + } + finalImage = growMat; + } + else + { + finalImage = newFrame; + } + + sliderPos = seams; + +} + +void SeamCarving::computeNewFinalImage(int sliderPos) +{ + if(sliderPos == 0) + { + finalImage = image; + return; + } + if(sliderPos < 1 || sliderPos >= sliderMax-1) + { + return; + } + if(sliderPos > vecSeams.size()) + { + cv::Mat newFrame = finalImage.clone(); + for(int i = vecSeams.size()-1; i < sliderPos; i++) + { + //Gradient Magnitude for intensity of image. + cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame); + //Use DP to create the real energy map that is used for path calculation. + // Strictly using vertical paths for testing simplicity. + cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); + + if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) + { + finalImage = image; + break; + } + std::vector seam = getLeastImportantPath(pathIntensityMat); + vecSeams.push_back(seam); + newFrame = removeLeastImportantPath(newFrame,seam); + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + if (grow) + { + cv::Mat growMat = image.clone(); + + for (int i = 0; i < vecSeams.size(); i++) + { + growMat = addLeastImportantPath(growMat,vecSeams[i]); + } + + finalImage = growMat; + } + else + { + finalImage = newFrame; + } + } + else if (sliderPos < vecSeams.size()) + { + cv::Mat newFrame = image.clone(); + for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not) + { + + if (grow) + { + newFrame = addLeastImportantPath(newFrame,vecSeams[i]); + } + else + { + newFrame = removeLeastImportantPath(newFrame,vecSeams[i]); + } + + if(newFrame.rows == 0 && newFrame.cols == 0) + { + finalImage = image; + break; + } + } + finalImage = newFrame; + } +} + +const cv::Mat& SeamCarving::getFinalImage() +{ + return finalImage; +} + +void SeamCarving::showSeamsImg() +{ + cv::Mat seamsFrame = image.clone(); + //std::cout << "sliderPos: " << sliderPos << std::endl; + for(int i = 0; i < sliderPos; i++) + { + seamsFrame = drawSeam(seamsFrame, vecSeams[i]); + } + cv::imwrite("output/seams_image.jpg", seamsFrame); + cv::imshow( "Image Seams", seamsFrame); +} + +static void onChange( int pos, void* object ) +{ + SeamCarving* sc = (SeamCarving*)(object); + /*if(sc->getBlockUpdateStatus()) { + return; + }*/ + sc->computeNewFinalImage(pos); + imshow("Final Image", sc->getFinalImage()); +#if DEBUG + sc->showSeamsImg(); +#endif +} +static void onMouse( int event, int x, int y, int, void* object) +{ + SeamCarving* sc = (SeamCarving*)(object); + if( event == cv::EVENT_LBUTTONDOWN || + event == cv::EVENT_RBUTTONDOWN || + event == cv::EVENT_MBUTTONDOWN + ) + { + sc->setBlockUpdate(true); + } + else if(event == cv::EVENT_LBUTTONUP || + event == cv::EVENT_RBUTTONUP || + event == cv::EVENT_MBUTTONUP) + { + sc->setBlockUpdate(false); + } +} + +void SeamCarving::setBlockUpdate(bool bUpdate) +{ + blockUpdate = bUpdate; +} + +bool SeamCarving::getBlockUpdateStatus() +{ + return blockUpdate; +} + +void SeamCarving::showImage() +{ +#if __cplusplus >= 201703L + if(!std::filesystem::exists("output")) + { + std::filesystem::create_directory("output"); + } +#endif + if( image.empty() ) + { + std::cout << "Could not open raw image" << std::endl ; + return; + } + namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE ); + cv::imshow( "Raw Image", image ); + + if( finalImage.empty() ) + { + std::cout << "Could not open final image" << std::endl ; + return; + } +#if DEBUG + namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE ); + cv::Mat gradient = computeGradientMagnitude(image); + cv::Mat u8_image; + gradient.convertTo(u8_image, CV_8U); + + cv::imwrite("output/gradient_image.jpg", u8_image); + cv::imshow("gradient Image", u8_image); + + namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE ); + cv::Mat u8_image2; + cv::Mat intensityMat = computePathIntensityMat(gradient); + cv::Mat dst; + cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX); + dst.convertTo(u8_image2, CV_8U); + cv::imwrite("output/intensity_image.jpg", u8_image2); + cv::imshow( "intensity Image", u8_image2); + + //cv::Mat engImg = GetEnergyImg(image); + //namedWindow("energy Image", cv::WINDOW_AUTOSIZE); + //cv::Mat u8_image3; + //engImg.convertTo(u8_image3, CV_8U); + //cv::imshow( "energy Image", u8_image3); + namedWindow("Image Seams", cv::WINDOW_AUTOSIZE); + showSeamsImg(); + +#endif + + namedWindow( "Final Image", cv::WINDOW_AUTOSIZE ); + cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this); + //cv::setMouseCallback("Final Image", onMouse, this ); + cv::imwrite("output/final_image.jpg", finalImage); + cv::imshow("Final Image", finalImage); + cv::waitKey(0); +} + +cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img) +{ + // find partial derivative of x-axis and y-axis seperately + // sum up the partial derivates + float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1}; + cv::Mat xFilter(3, 3, CV_32FC1, pd); + cv::Mat yFilter = xFilter.t(); + cv::Mat grayImg; + cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY); + cv::Mat dxImg; + cv::Mat dyImg; + + cv::filter2D(grayImg, dxImg, 0, xFilter); + cv::filter2D(grayImg, dyImg, 0, yFilter); + //cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type()); + //cv::Mat absDxImg; + //cv::Mat absDyImg; + //cv::absdiff(dxImg, zeroMat, absDxImg); + //cv::absdiff(dyImg, zeroMat, absDyImg); + cv::Mat absDxImg = cv::abs(dxImg); + cv::Mat absDyImg = cv::abs(dyImg); + + cv::Mat energyImg; + cv::add(absDxImg, absDyImg, energyImg); + return energyImg; +} + +cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame) +{ + cv::Mat grayScale; + cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY); + cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1); + cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1); + cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1); + Sobel(grayScale, drv, CV_16SC1, 1, 0); + drv.convertTo(drv32f, CV_32FC1); + cv::accumulateSquare(drv32f, mag); + Sobel(grayScale, drv, CV_16SC1, 0, 1); + drv.convertTo(drv32f, CV_32FC1); + cv::accumulateSquare(drv32f, mag); + cv::sqrt(mag, mag); + return mag; +} + +float SeamCarving::intensity(float currIndex, int start, int end) +{ + if(start < 0 || start >= end) + { + return FLT_MAX; + } + else + { + return currIndex; + } +} + +cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap) +{ + cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1); + + if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0) + { + return cv::Mat(); + } + + //First row of intensity paths is the same as the energy map + rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0)); + float max = 0; + + //The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity. + for(int row = 1; row < pathIntensityMap.rows; row++) + { + for(int col = 0; col < pathIntensityMap.cols; col++) + { + //The initial intensity of the pixel is its raw intensity + float pixelIntensity = rawEnergyMap.at(row, col); + //The minimum intensity from the current path of the 3 pixels above it is added to its intensity. + float p1 = intensity(pathIntensityMap.at(row-1, col-1), col - 1, pathIntensityMap.cols); + float p2 = intensity(pathIntensityMap.at(row-1, col), col, pathIntensityMap.cols); + float p3 = intensity(pathIntensityMap.at(row-1, col+1), col + 1, pathIntensityMap.cols); + + float minIntensity = std::min(p1, p2); + minIntensity = std::min(minIntensity, p3); + + pixelIntensity += minIntensity; + + max = std::max(max, pixelIntensity); + pathIntensityMap.at(row, col) = pixelIntensity; + } + } + return pathIntensityMap; +} + +std::vector SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap) +{ + if(importanceMap.total() == 0) + { + return std::vector(); + } + + //Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable. + float minImportance = importanceMap.at(importanceMap.rows - 1, 0); + int minCol = 0; + for (int col = 1; col < importanceMap.cols; col++) + { + float currPixel =importanceMap.at(importanceMap.rows - 1, col); + if(currPixel < minImportance) + { + minCol = col; + minImportance = currPixel; + } + } + + std::vector leastEnergySeam(importanceMap.rows); + leastEnergySeam[importanceMap.rows-1] = minCol; + for(int row = importanceMap.rows - 2; row >= 0; row--) + { + float p1 = intensity(importanceMap.at(row, minCol-1), minCol - 1, importanceMap.cols); + float p2 = intensity(importanceMap.at(row, minCol), minCol, importanceMap.cols); + float p3 = intensity(importanceMap.at(row, minCol+1), minCol + 1, importanceMap.cols); + //Adjust the min column for path following + if(p1 < p2 && p1 < p3) + { + minCol -= 1; + } + else if(p3 < p1 && p3 < p2) + { + minCol += 1; + } + leastEnergySeam[row] = minCol; + } + + return leastEnergySeam; +} + +cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector &seam) +{ + cv::Size orgSize = original.size(); + // new mat needs to shrink by one collumn + cv::Size size = cv::Size(orgSize.width-1, orgSize.height); + cv::Mat newMat = cv::Mat(size, original.type()); + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = newMat.data; + for(int row = 0; row < seam.size(); row++) + { + removePixel(original, newMat, row, seam[row]); + } + return newMat; +} + +void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) +{ + int width = original.cols; + int channels = original.channels(); + int originRowStart = row * channels * width; + int newRowStart = row * channels * (width - 1); + int firstNum = minCol * channels; + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = outputMat.data; + + //std::cout << "originRowStart: " << originRowStart << std::endl; + //std::cout << "newRowStart: " << newRowStart << std::endl; + //std::cout << "firstNum: " << firstNum << std::endl; + memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); + + int originRowMid = originRowStart + (minCol + 1) * channels; + int newRowMid = newRowStart + minCol * channels; + int secondNum = (width - 1) * channels - firstNum; + + //std::cout << "originRowMid: " << originRowMid << std::endl; + //std::cout << "newRowMid: " << newRowMid << std::endl; + //std::cout << "secondNum: " << secondNum << std::endl; + memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); + + int leftPixel = minCol - 1; + int rightPixel = minCol + 1; + + int byte1 = rawOrig[originRowStart + minCol * channels]; + int byte2 = rawOrig[originRowStart + minCol * channels + 1]; + int byte3 = rawOrig[originRowStart + minCol * channels + 2]; + + if (rightPixel < width) + { + int byte1R = rawOrig[originRowStart + rightPixel * channels]; + int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; + int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; + rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); + rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); + rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); + } + + if(leftPixel >= 0) + { + int byte1L = rawOrig[originRowStart + leftPixel*channels]; + int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; + int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; + rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); + rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); + rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); + } +} + +cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector &seam) +{ + cv::Size orgSize = original.size(); + // new mat needs to grow by one column + cv::Size size = cv::Size(orgSize.width+1, orgSize.height); + cv::Mat newMat = cv::Mat(size, original.type()); + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = newMat.data; + for(int row = 0; row < seam.size(); row++) + { + //std::cout << "row: " << row << ", col: " << seam[row] << std::endl; + addPixel(original, newMat, row, seam[row]); + } + return newMat; +} + +void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) +{ + int width = original.cols; + int channels = original.channels(); + int originRowStart = row * channels * width; + int newRowStart = row * channels * (width + 1); + int firstNum = (minCol + 1) * channels; + + unsigned char *rawOrig = original.data; + unsigned char *rawOutput = outputMat.data; + + memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); + + memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels); + + int originRowMid = originRowStart + ((minCol + 1) * channels); + int newRowMid = newRowStart + ((minCol + 2) * channels); + int secondNum = (width * channels) - firstNum; + + memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); + + int leftPixel = minCol - 1; + int rightPixel = minCol + 1; + + int byte1 = rawOrig[originRowStart + minCol * channels]; + int byte2 = rawOrig[originRowStart + minCol * channels + 1]; + int byte3 = rawOrig[originRowStart + minCol * channels + 2]; + + if (rightPixel < width) + { + int byte1R = rawOrig[originRowStart + rightPixel * channels]; + int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; + int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; + rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); + rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); + rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); + } + + if(leftPixel >= 0) + { + int byte1L = rawOrig[originRowStart + leftPixel*channels]; + int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; + int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; + rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); + rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); + rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); + } +} diff --git a/seamcarving.h b/seamcarving.h new file mode 100644 index 0000000..feb133d --- /dev/null +++ b/seamcarving.h @@ -0,0 +1,61 @@ +#ifndef __SEAM__CARVING_HPP__ +#define __SEAM__CARVING_HPP__ + +#include +#define DEBUG 0 + +class SeamCarving { + public: + void showImage(); + const cv::Mat& getFinalImage(); + virtual void computeNewFinalImage(int pos); + void setBlockUpdate(bool bUpdate); + bool getBlockUpdateStatus(); + virtual void showSeamsImg(); + + protected: + SeamCarving(const cv::Mat &img, int seams, bool grow); + void init(); + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) = 0; + cv::Mat image; + cv::Mat finalImage; + int seams; + bool grow; + int sliderMax; + int sliderPos; + std::vector> vecSeams; + + private: + cv::Mat GetEnergyImg(const cv::Mat &img); + cv::Mat computeGradientMagnitude(const cv::Mat &frame); + float intensity(float currIndex, int start, int end); + cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap); + std::vector getLeastImportantPath(const cv::Mat &importanceMap); + cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector &seam); + void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol); + cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector &seam); + void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol); + bool blockUpdate = false; + +}; + +class SeamCarvingHorizontal : public SeamCarving +{ + public: + SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false); + protected: + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) override; +}; + +class SeamCarvingVertical : public SeamCarving { + public: + SeamCarvingVertical(char* fileName, int seams=100, bool grow=false); + virtual void computeNewFinalImage(int pos) override; +#if DEBUG + virtual void showSeamsImg() override; +#endif + protected: + virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector &seam) override; +}; + +#endif // __SEAM__CARVING_HPP__ diff --git a/seamcarvinghoriz.cpp b/seamcarvinghoriz.cpp new file mode 100644 index 0000000..98886fc --- /dev/null +++ b/seamcarvinghoriz.cpp @@ -0,0 +1,28 @@ +#include "seamcarving.h" +#include +#include +#include +#include +#include + +cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector &seam) +{ + cv::Mat retMat = frame.clone(); + for(int row = 0; row < frame.rows; row++) + { + for(int col = 0; col < frame.cols; col++) + { + retMat.at(row, seam[row])[0] = 0; + retMat.at(row, seam[row])[1] = 255; + retMat.at(row, seam[row])[2] = 0; + } + } + return retMat; +} + +SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) : + SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow) +{ + sliderMax = image.cols; + init(); +} diff --git a/seamcarvingvert.cpp b/seamcarvingvert.cpp new file mode 100644 index 0000000..41c045b --- /dev/null +++ b/seamcarvingvert.cpp @@ -0,0 +1,51 @@ +#include "seamcarving.h" +#include +#include +#include +#include +#include + +SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) : + SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow) +{ + sliderMax = image.rows; + cv::Mat oldImage = image; + image = image.t(); + init(); + image = oldImage; + finalImage = finalImage.t(); +} + +cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector &seam) +{ + cv::Mat retMat = frame.clone(); + for(int col = 0; col < frame.cols; col++) + { + for(int row = 0; row < frame.rows; row++) + { + retMat.at(seam[col], col)[0] = 0; + retMat.at(seam[col], col)[1] = 255; + retMat.at(seam[col], col)[2] = 0; + } + } + return retMat; +} + +void SeamCarvingVertical::computeNewFinalImage(int pos) +{ + cv::Mat oldImage = image; + image = image.t(); + SeamCarving::computeNewFinalImage(pos); + image = oldImage; + finalImage = finalImage.t(); +} + +#if DEBUG +void SeamCarvingVertical::showSeamsImg() +{ + cv::Mat oldImage = this->image; + this->image = this->image.t(); + SeamCarving::showImage(); + this->image = oldImage; +} +#endif diff --git a/tokenize.cpp b/tokenize.cpp new file mode 100644 index 0000000..af8509b --- /dev/null +++ b/tokenize.cpp @@ -0,0 +1,26 @@ +#include "tokenize.h" + + +std::vector tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar) +{ + std::vector tokens; + std::string token; + bool inBaracket = false; + for(size_t i = 0; i < str.size(); ++i) + { + if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar)) + { + tokens.push_back(token); + token.clear(); + } + else + { + token.push_back(str[i]); + } + if(ignoreBraket == str[i]) + inBaracket = !inBaracket; + } + if(!inBaracket) + tokens.push_back(token); + return tokens; +} diff --git a/tokenize.h b/tokenize.h new file mode 100644 index 0000000..6641e5e --- /dev/null +++ b/tokenize.h @@ -0,0 +1,7 @@ +#pragma once + +#include +#include + +std::vector tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0', + const char escapeChar = '\0'); diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..65b2fdb --- /dev/null +++ b/utils.cpp @@ -0,0 +1,60 @@ +#include "utils.h" + +#include +#include +#include + +bool isImagePath(const std::filesystem::path& path) +{ + return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg"); +} + +void getImageFiles(const std::filesystem::path& path, std::vector& paths) +{ + if(isImagePath(path)) + { + paths.push_back(path); + } + else if(std::filesystem::is_directory(path)) + { + for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path)) + { + if(std::filesystem::is_directory(dirent.path())) + getImageFiles(dirent.path(), paths); + else if(isImagePath(dirent.path())) + paths.push_back(dirent.path()); + } + } +} + +cv::Rect rectFromPoints(const std::vector>& points) +{ + int left = std::numeric_limits::max(); + int right = std::numeric_limits::min(); + int top = std::numeric_limits::max(); + int bottom = std::numeric_limits::min(); + + for(const std::pair& point : points) + { + left = point.first.x < left ? point.first.x : left; + right = point.first.x > right ? point.first.x : right; + + top = point.first.y < top ? point.first.y : top; + bottom = point.first.y > bottom ? point.first.y : bottom; + } + + return cv::Rect(left, top, right-left, bottom-top); +} + +double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB) +{ + cv::Vec2i a(pointA.x, pointA.y); + cv::Vec2i b(pointB.x, pointB.y); + return cv::norm(a-b); +} + +bool pointInRect(const cv::Point2i& point, const cv::Rect& rect) +{ + return point.x >= rect.x && point.x <= rect.x+rect.width && + point.y >= rect.y && point.y <= rect.y+rect.height; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..33a88df --- /dev/null +++ b/utils.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include +#include + +bool isImagePath(const std::filesystem::path& path); + +void getImageFiles(const std::filesystem::path& path, std::vector& paths); + +cv::Rect rectFromPoints(const std::vector>& points); + +double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB); + +bool pointInRect(const cv::Point2i& point, const cv::Rect& rect); diff --git a/yolo.cpp b/yolo.cpp new file mode 100644 index 0000000..11c9759 --- /dev/null +++ b/yolo.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include + +#include "yolo.h" +#include "readfile.h" +#include "tokenize.h" +#include "log.h" + +#define INCBIN_PREFIX r +#include "incbin.h" + +INCTXT(defaultClasses, "../classes.txt"); +INCBIN(defaultModel, "../yolov8x.onnx"); + +Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape, + const std::filesystem::path& classesTxtFilePath, bool runWithOCl) +{ + modelPath = onnxModelPath; + modelShape = modelInputShape; + + if(classesTxtFilePath.empty()) + { + loadClasses(rdefaultClassesData); + } + else + { + std::string classesStr = readFile(classesTxtFilePath); + loadClasses(classesStr); + } + + if(!modelPath.empty()) + net = cv::dnn::readNetFromONNX(modelPath); + else + net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize); + + if(runWithOCl) + { + std::cout << "\nRunning on OCV" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT); + net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL); + } + else + { + std::cout << "\nRunning on CPU" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); + } +} + +std::vector Yolo::runInference(const cv::Mat &input) +{ + cv::Mat modelInput = input; + if (letterBoxForSquare && modelShape.width == modelShape.height) + modelInput = formatToSquare(modelInput); + + cv::Mat blob; + cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); + net.setInput(blob); + + std::vector outputs; + net.forward(outputs, net.getUnconnectedOutLayersNames()); + + int rows = outputs[0].size[1]; + int dimensions = outputs[0].size[2]; + + bool yolov8 = false; + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + yolov8 = true; + rows = outputs[0].size[2]; + dimensions = outputs[0].size[1]; + + outputs[0] = outputs[0].reshape(1, dimensions); + cv::transpose(outputs[0], outputs[0]); + } + float *data = (float *)outputs[0].data; + + float x_factor = modelInput.cols / modelShape.width; + float y_factor = modelInput.rows / modelShape.height; + + std::vector class_ids; + std::vector confidences; + std::vector boxes; + + for (int i = 0; i < rows; ++i) + { + if (yolov8) + { + float *classes_scores = data+4; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore; + + minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > modelScoreThreshold) + { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + else // yolov5 + { + float confidence = data[4]; + + if (confidence >= modelConfidenceThreshold) + { + float *classes_scores = data+5; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double max_class_score; + + minMaxLoc(scores, 0, &max_class_score, 0, &class_id); + + if (max_class_score > modelScoreThreshold) + { + confidences.push_back(confidence); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + } + + data += dimensions; + } + + std::vector nms_result; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); + + std::vector detections{}; + for(unsigned long i = 0; i < nms_result.size(); ++i) + { + int idx = nms_result[i]; + + Yolo::Detection result; + result.class_id = class_ids[idx]; + result.confidence = confidences[idx]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(100, 255); + result.color = cv::Scalar(dis(gen), + dis(gen), + dis(gen)); + + result.className = classes[result.class_id].first; + result.priority = classes[result.class_id].second; + result.box = boxes[idx]; + + detections.push_back(result); + } + + return detections; +} + +void Yolo::loadClasses(const std::string& classesStr) +{ + std::vector candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\'); + classes.clear(); + for(std::string& instance : candidateClasses) + { + if(instance.size() < 2) + continue; + + std::vector tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\'); + + if(*tokens[0].begin() == '"') + instance.erase(tokens[0].begin()); + if(tokens[0].back() == '"') + tokens[0].pop_back(); + int priority = -1; + if(tokens.size() > 1) + { + try + { + priority = std::stoi(tokens[1]); + } + catch(const std::invalid_argument& err) + { + Log(Log::WARN)<<"unable to get priority for class "< +#include +#include +#include +#include +#include +#include +#include + +class Yolo +{ +public: + struct Detection + { + int class_id = 0; + std::string className; + float confidence = 0.0; + int priority = -1; + cv::Scalar color; + cv::Rect box; + }; + +private: + static constexpr float modelConfidenceThreshold = 0.25; + static constexpr float modelScoreThreshold = 0.45; + static constexpr float modelNMSThreshold = 0.50; + + void loadClasses(const std::string& classes); + void loadOnnxNetwork(const std::filesystem::path& path); + cv::Mat formatToSquare(const cv::Mat &source); + + std::string modelPath; + + std::vector> classes; + + cv::Size2f modelShape; + + bool letterBoxForSquare = true; + + cv::dnn::Net net; + +public: + Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480}, + const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true); + std::vector runInference(const cv::Mat &input); + int getClassForStr(const std::string& str) const; +};