SmartCrop: allow target image aspect ratio to be different than 1:1

This commit is contained in:
uvos 2024-12-11 22:48:52 +01:00
parent 422debd897
commit 57361af9fe
4 changed files with 43 additions and 28 deletions

View File

@ -51,12 +51,16 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
} }
} }
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude) cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude)
{ {
incompleate = false; incompleate = false;
int diameter = std::min(imageSize.height, imageSize.width);
cv::Point2i point(imageSize.width/2, imageSize.height/2); cv::Point2i point(imageSize.width/2, imageSize.height/2);
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter); cv::Rect candiate;
if(imageSize.width/targetAspectRatio > imageSize.height)
candiate = cv::Rect(point.x-(imageSize.height*(targetAspectRatio/2)), 0, imageSize.height*targetAspectRatio, imageSize.height);
else
candiate = cv::Rect(0, point.y-(imageSize.width/targetAspectRatio)/2, imageSize.width, imageSize.width/targetAspectRatio);
std::sort(mustInclude.begin(), mustInclude.end(), std::sort(mustInclude.begin(), mustInclude.end(),
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);}); [&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -64,7 +68,7 @@ cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize,
while(true) while(true)
{ {
cv::Rect includeRect = rectFromPoints(mustInclude); cv::Rect includeRect = rectFromPoints(mustInclude);
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter) if(includeRect.width-2 > candiate.width || includeRect.height-2 > candiate.height)
{ {
incompleate = true; incompleate = true;
slideRectToPoint(candiate, mustInclude.back().first); slideRectToPoint(candiate, mustInclude.back().first);
@ -99,30 +103,33 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
personId = yolo.getClassForStr("person"); personId = yolo.getClassForStr("person");
} }
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize) bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio)
{ {
std::vector<std::pair<cv::Point2i, int>> corners; std::vector<std::pair<cv::Point2i, int>> corners;
for(size_t i = 0; i < detections.size(); ++i) for(size_t i = 0; i < detections.size(); ++i)
{ {
int priority = detections[i].priority; int priority = detections[i].priority;
if(detections[i].class_id == personId) if(priority > 0)
{ {
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2}); if(detections[i].class_id == personId)
corners.push_back({detections[i].box.tl(), priority+1}); {
corners.push_back({detections[i].box.br(), priority}); corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1}); corners.push_back({detections[i].box.tl(), priority+1});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); corners.push_back({detections[i].box.br(), priority});
} corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
else corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
{ }
corners.push_back({detections[i].box.tl(), priority}); else
corners.push_back({detections[i].box.br(), priority}); {
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority}); corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
} }
} }
bool incompleate; bool incompleate;
out = maxRect(incompleate, imageSize, corners); out = maxRect(incompleate, imageSize, targetAspectRatio, corners);
return incompleate; return incompleate;
} }

View File

@ -29,9 +29,9 @@ private:
int personId; int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center); static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point); static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {}); static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public: public:
InteligentRoi(const Yolo& yolo); InteligentRoi(const Yolo& yolo);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize); bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio);
}; };

View File

@ -29,6 +29,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <numeric> #include <numeric>
#include <opencv2/highgui.hpp>
#include "yolo.h" #include "yolo.h"
#include "log.h" #include "log.h"
@ -309,7 +310,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
} }
cv::Rect crop; cv::Rect crop;
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size()); bool incompleate = intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());
if(config.seamCarving && incompleate) if(config.seamCarving && incompleate)
{ {
@ -329,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
else else
{ {
if(incompleate) if(incompleate)
intRoi.getCropRectangle(crop, detections, image.size()); intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());
if(config.debug) if(config.debug)
{ {
cv::Mat debugImage = image.clone(); cv::Mat debugImage = image.clone();
@ -444,7 +445,7 @@ int main(int argc, char* argv[])
} }
std::vector<std::thread> threads; std::vector<std::thread> threads;
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency()); std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, 1/*std::thread::hardware_concurrency()*/);
for(size_t i = 0; i < imagePathParts.size(); ++i) for(size_t i = 0; i < imagePathParts.size(); ++i)
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath))); threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));

View File

@ -40,7 +40,8 @@ static struct argp_option options[] =
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" }, {"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" }, {"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"}, {"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"}, {"x-size", 'x', "[PIXELS]", 0, "target output width, default: 1024"},
{"y-size", 'y', "[PIXELS]", 0, "target output height, default: 1024"},
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"}, {"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"}, {"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
{0} {0}
@ -56,7 +57,7 @@ struct Config
bool seamCarving = false; bool seamCarving = false;
bool debug = false; bool debug = false;
double threshold = 0.363; double threshold = 0.363;
cv::Size targetSize = cv::Size(512, 512); cv::Size targetSize = cv::Size(1024, 1024);
}; };
static error_t parse_opt (int key, char *arg, struct argp_state *state) static error_t parse_opt (int key, char *arg, struct argp_state *state)
@ -93,10 +94,16 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
case 't': case 't':
config->threshold = std::atof(arg); config->threshold = std::atof(arg);
break; break;
case 'z': case 'x':
{ {
int x = std::stoi(arg); int x = std::stoi(arg);
config->targetSize = cv::Size(x, x); config->targetSize = cv::Size(x, config->targetSize.height);
break;
}
case 'y':
{
int y = std::stoi(arg);
config->targetSize = cv::Size(config->targetSize.width, y);
break; break;
} }
case ARGP_KEY_ARG: case ARGP_KEY_ARG: