SmartCrop: allow target image aspect ratio to be different than 1:1

2024-12-11 22:48:52 +01:00 · 2024-12-11 22:48:52 +01:00 · 57361af9fe
commit 57361af9fe
parent 422debd897
4 changed files with 43 additions and 28 deletions
--- a/SmartCrop/intelligentroi.cpp
+++ b/SmartCrop/intelligentroi.cpp
@ -51,12 +51,16 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
 	}
 }

-cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
+cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude)
 {
 	incompleate = false;
-	int diameter = std::min(imageSize.height, imageSize.width);
+
 	cv::Point2i point(imageSize.width/2, imageSize.height/2);
-	cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
+	cv::Rect candiate;
+	if(imageSize.width/targetAspectRatio > imageSize.height)
+		candiate = cv::Rect(point.x-(imageSize.height*(targetAspectRatio/2)), 0, imageSize.height*targetAspectRatio, imageSize.height);
+	else
+		candiate = cv::Rect(0, point.y-(imageSize.width/targetAspectRatio)/2, imageSize.width, imageSize.width/targetAspectRatio);

 	std::sort(mustInclude.begin(), mustInclude.end(),
 		[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -64,7 +68,7 @@ cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize,
 	while(true)
 	{
 		cv::Rect includeRect = rectFromPoints(mustInclude);
-		if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
+		if(includeRect.width-2 > candiate.width || includeRect.height-2 > candiate.height)
 		{
 			incompleate = true;
 			slideRectToPoint(candiate, mustInclude.back().first);
@ -99,30 +103,33 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
 	personId = yolo.getClassForStr("person");
 }

-bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
+bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio)
 {
 	std::vector<std::pair<cv::Point2i, int>> corners;
 	for(size_t i = 0; i < detections.size(); ++i)
 	{
 		int priority = detections[i].priority;
-		if(detections[i].class_id == personId)
+		if(priority > 0)
 		{
-			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
-			corners.push_back({detections[i].box.tl(), priority+1});
-			corners.push_back({detections[i].box.br(), priority});
-			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
-			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
-		}
-		else
-		{
-			corners.push_back({detections[i].box.tl(), priority});
-			corners.push_back({detections[i].box.br(), priority});
-			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
-			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+			if(detections[i].class_id == personId)
+			{
+				corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
+				corners.push_back({detections[i].box.tl(), priority+1});
+				corners.push_back({detections[i].box.br(), priority});
+				corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
+				corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+			}
+			else
+			{
+				corners.push_back({detections[i].box.tl(), priority});
+				corners.push_back({detections[i].box.br(), priority});
+				corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
+				corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+			}
 		}
 	}

 	bool incompleate;
-	out = maxRect(incompleate, imageSize, corners);
+	out = maxRect(incompleate, imageSize, targetAspectRatio, corners);
 	return incompleate;
 }
--- a/SmartCrop/intelligentroi.h
+++ b/SmartCrop/intelligentroi.h
@ -29,9 +29,9 @@ private:
 	int personId;
 	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
 	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
-	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
+	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, double targetAspectRatio, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});

 public:
 	InteligentRoi(const Yolo& yolo);
-	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
+	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize, double targetAspectRatio);
 };
--- a/SmartCrop/main.cpp
+++ b/SmartCrop/main.cpp
@ -29,6 +29,7 @@
 #include <string>
 #include <vector>
 #include <numeric>
+#include <opencv2/highgui.hpp>

 #include "yolo.h"
 #include "log.h"
@ -309,7 +310,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	}

 	cv::Rect crop;
-	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
+	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());

 	if(config.seamCarving && incompleate)
 	{
@ -329,7 +330,7 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 	else
 	{
 		if(incompleate)
-			intRoi.getCropRectangle(crop, detections, image.size());
+			intRoi.getCropRectangle(crop, detections, image.size(), config.targetSize.aspectRatio());
 		if(config.debug)
 		{
 			cv::Mat debugImage = image.clone();
@ -444,7 +445,7 @@ int main(int argc, char* argv[])
 	}

 	std::vector<std::thread> threads;
-	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
+	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, 1/*std::thread::hardware_concurrency()*/);

 	for(size_t i = 0; i < imagePathParts.size(); ++i)
 		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
--- a/SmartCrop/options.h
+++ b/SmartCrop/options.h
@ -40,7 +40,8 @@ static struct argp_option options[] =
  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
  {"debug", 		'd', 0,				0,	"output debug images" },
  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
-  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
+  {"x-size", 		'x', "[PIXELS]",	0,	"target output width, default: 1024"},
+  {"y-size", 		'y', "[PIXELS]",	0,	"target output height, default: 1024"},
  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
  {0}
@ -56,7 +57,7 @@ struct Config
 	bool seamCarving = false;
 	bool debug = false;
 	double threshold = 0.363;
-	cv::Size targetSize = cv::Size(512, 512);
+	cv::Size targetSize = cv::Size(1024, 1024);
 };

 static error_t parse_opt (int key, char *arg, struct argp_state *state)
@ -93,10 +94,16 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
 		case 't':
 			config->threshold = std::atof(arg);
 			break;
-		case 'z':
+		case 'x':
 		{
 			int x = std::stoi(arg);
-			config->targetSize = cv::Size(x, x);
+			config->targetSize = cv::Size(x, config->targetSize.height);
+			break;
+		}
+		case 'y':
+		{
+			int y = std::stoi(arg);
+			config->targetSize = cv::Size(config->targetSize.width, y);
 			break;
 		}
 		case ARGP_KEY_ARG: