SmartCrop: allow target image aspect ratio to be different than 1:1

This commit is contained in:
uvos 2024-12-11 22:48:52 +01:00
parent 422debd897
commit 57361af9fe
4 changed files with 43 additions and 28 deletions

View File

@ -51,12 +51,16 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
}
}
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude)
{
incompleate = false;
int diameter = std::min(imageSize.height, imageSize.width);
cv::Point2i point(imageSize.width/2, imageSize.height/2);
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
cv::Rect candiate;
if(imageSize.width/targetAspectRatio > imageSize.height)
candiate = cv::Rect(point.x-(imageSize.height*(targetAspectRatio/2)), 0, imageSize.height*targetAspectRatio, imageSize.height);
else
candiate = cv::Rect(0, point.y-(imageSize.width/targetAspectRatio)/2, imageSize.width, imageSize.width/targetAspectRatio);
std::sort(mustInclude.begin(), mustInclude.end(),
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -64,7 +68,7 @@ cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize,
while(true)
{
cv::Rect includeRect = rectFromPoints(mustInclude);
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
if(includeRect.width-2 > candiate.width || includeRect.height-2 > candiate.height)
{
incompleate = true;
slideRectToPoint(candiate, mustInclude.back().first);
@ -99,30 +103,33 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
personId = yolo.getClassForStr("person");
}
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio)
{
std::vector<std::pair<cv::Point2i, int>> corners;
for(size_t i = 0; i < detections.size(); ++i)
{
int priority = detections[i].priority;
if(detections[i].class_id == personId)
if(priority > 0)
{
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
corners.push_back({detections[i].box.tl(), priority+1});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
else
{
corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
if(detections[i].class_id == personId)
{
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
corners.push_back({detections[i].box.tl(), priority+1});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
else
{
corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
}
}
bool incompleate;
out = maxRect(incompleate, imageSize, corners);
out = maxRect(incompleate, imageSize, targetAspectRatio, corners);
return incompleate;
}

View File

@ -29,9 +29,9 @@ private:
int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public:
InteligentRoi(const Yolo& yolo);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio);
};

View File

@ -29,6 +29,7 @@
#include <string>
#include <vector>
#include <numeric>
#include <opencv2/highgui.hpp>
#include "yolo.h"
#include "log.h"
@ -309,7 +310,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
}
cv::Rect crop;
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());
if(config.seamCarving && incompleate)
{
@ -329,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
else
{
if(incompleate)
intRoi.getCropRectangle(crop, detections, image.size());
intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());
if(config.debug)
{
cv::Mat debugImage = image.clone();
@ -444,7 +445,7 @@ int main(int argc, char* argv[])
}
std::vector<std::thread> threads;
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, 1/*std::thread::hardware_concurrency()*/);
for(size_t i = 0; i < imagePathParts.size(); ++i)
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));

View File

@ -40,7 +40,8 @@ static struct argp_option options[] =
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
{"x-size", 'x', "[PIXELS]", 0, "target output width, default: 1024"},
{"y-size", 'y', "[PIXELS]", 0, "target output height, default: 1024"},
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
{0}
@ -56,7 +57,7 @@ struct Config
bool seamCarving = false;
bool debug = false;
double threshold = 0.363;
cv::Size targetSize = cv::Size(512, 512);
cv::Size targetSize = cv::Size(1024, 1024);
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
@ -93,10 +94,16 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
case 't':
config->threshold = std::atof(arg);
break;
case 'z':
case 'x':
{
int x = std::stoi(arg);
config->targetSize = cv::Size(x, x);
config->targetSize = cv::Size(x, config->targetSize.height);
break;
}
case 'y':
{
int y = std::stoi(arg);
config->targetSize = cv::Size(config->targetSize.width, y);
break;
}
case ARGP_KEY_ARG: