Compare commits

...

10 commits

34 changed files with 1505 additions and 760 deletions

View file

@ -1,15 +1,7 @@
cmake_minimum_required(VERSION 3.6) cmake_minimum_required(VERSION 3.6)
project(AIImagePrepross) project(ImageAiUtils)
find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp) add_subdirectory(SmartCrop)
add_executable(${PROJECT_NAME} ${SRC_FILES})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)

View file

@ -0,0 +1,141 @@
import warnings
from deepdanbooru_onnx import DeepDanbooru
import argparse
import cv2
import torch
import os
import numpy
from typing import Iterator
from torch.multiprocessing import Process, Queue
import json
from tqdm import tqdm
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
def find_image_files(path: str) -> list[str]:
paths = list()
for root, dirs, files in os.walk(path):
for filename in files:
name, extension = os.path.splitext(filename)
if extension.lower() in image_ext_ocv:
paths.append(os.path.join(root, filename))
return paths
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
for path in paths:
name, extension = os.path.splitext(path)
extension = extension.lower()
imagebgr = cv2.imread(path)
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
if image is None:
print(f"Warning: could not load {path}")
else:
yield image, path
def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False,
bnb_4bit_quant_type='nf4',
), device_map=device, attn_implementation="flash_attention_2")
processor = AutoProcessor.from_pretrained(model_name_or_path)
image_generator = image_loader(image_paths)
stop = False
finished_count = 0
while not stop:
prompts = list()
images = list()
filenames = list()
for i in range(0, batch_size):
image, filename = next(image_generator, (None, None))
if image is None:
stop = True
break
filenames.append(filename)
images.append(image)
prompts.append(prompt)
if len(images) == 0:
break
inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
finished_count += len(images)
for i, decoded in enumerate(decodes):
trim = len(prompt) - len("<image>")
queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
def split_list(input_list, count):
target_length = int(len(input_list) / count)
for i in range(0, count - 1):
yield input_list[i * target_length: (i + 1) * target_length]
yield input_list[(count - 1) * target_length: len(input_list)]
def save_meta(meta_file, meta, reldir, common_description):
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
if common_description is not None:
meta["text"] = common_description + meta["text"]
meta_file.write(json.dumps(meta) + '\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser("A script to tag images via llava")
parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
args = parser.parse_args()
prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
os.environ["BITSANDBYTES_NOWELCOME"] = "1"
image_paths = find_image_files(args.image_dir)
image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
print(f"Will use {torch.cuda.device_count()} processies to create tags")
logging.set_verbosity_error()
warnings.filterwarnings("ignore")
torch.multiprocessing.set_start_method('spawn')
queue = Queue()
processies = list()
for i in range(0, torch.cuda.device_count()):
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
processies[-1].start()
progress = tqdm(desc="Generateing tags", total=len(image_paths))
exit = False
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
while not exit:
if not queue.empty():
meta = queue.get()
save_meta(output_file, meta, args.image_dir, args.common_description)
progress.update()
exit = True
for process in processies:
if process.is_alive():
exit = False
break
while not queue.empty():
meta = queue.get()
save_meta(output_file, meta, args.image_dir, args.common_description)
progress.update()
for process in processies:
process.join()

View file

@ -0,0 +1,3 @@
from .deepdanbooru_onnx import DeepDanbooru
from .deepdanbooru_onnx import process_image
__version__ = '0.0.8'

View file

@ -0,0 +1,244 @@
import onnxruntime as ort
from PIL import Image
import numpy as np
import os
from tqdm import tqdm
import requests
import hashlib
from typing import List, Union
import shutil
from pathlib import Path
def process_image(image: Image.Image) -> np.ndarray:
"""
Convert an image to a numpy array.
:param image: the image to convert
:return: the numpy array
"""
image = image.convert("RGB").resize((512, 512))
image = np.array(image).astype(np.float32) / 255
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
return image
def download(url: str, save_path: str, md5: str, length: str) -> bool:
"""
Download a file from url to save_path.
If the file already exists, check its md5.
If the md5 matches, return True,if the md5 doesn't match, return False.
:param url: the url of the file to download
:param save_path: the path to save the file
:param md5: the md5 of the file
:param length: the length of the file
:return: True if the file is downloaded successfully, False otherwise
"""
try:
response = requests.get(url=url, stream=True)
with open(save_path, "wb") as f:
with tqdm.wrapattr(
response.raw, "read", total=length, desc="Downloading"
) as r_raw:
shutil.copyfileobj(r_raw, f)
return (
True
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
else False
)
except Exception as e:
print(e)
return False
def download_model():
"""
Download the model and tags file from the server.
:return: the path to the model and tags file
"""
model_url = (
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
)
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
model_length = 643993025
tags_length = 133810
home = str(Path.home()) + "/.deepdanbooru_onnx/"
if not os.path.exists(home):
os.mkdir(home)
model_name = "deepdanbooru.onnx"
tags_name = "tags.txt"
model_path = home + model_name
tags_path = home + tags_name
if os.path.exists(model_path):
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
os.remove(model_path)
if not download(model_url, model_path, model_md5, model_length):
raise ValueError("Model download failed")
else:
if not download(model_url, model_path, model_md5, model_length):
raise ValueError("Model download failed")
if os.path.exists(tags_path):
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
os.remove(tags_path)
if not download(tags_url, tags_path, tags_md5, tags_length):
raise ValueError("Tags download failed")
else:
if not download(tags_url, tags_path, tags_md5, tags_length):
raise ValueError("Tags download failed")
return model_path, tags_path
class DeepDanbooru:
def __init__(
self,
mode: str = "auto",
model_path: Union[str, None] = None,
tags_path: Union[str, None] = None,
threshold: Union[float, int] = 0.6,
pin_memory: bool = False,
batch_size: int = 1,
):
"""
Initialize the DeepDanbooru class.
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
:param model_path: the path to the model file
:param tags_path: the path to the tags file
:param threshold: the threshold of the model
:param pin_memory: whether to use pin memory
:param batch_size: the batch size of the model
"""
providers = {
"cpu": "CPUExecutionProvider",
"gpu": "CUDAExecutionProvider",
"tensorrt": "TensorrtExecutionProvider",
"auto": (
"CUDAExecutionProvider"
if "CUDAExecutionProvider" in ort.get_available_providers()
else "CPUExecutionProvider"
),
}
if not (isinstance(threshold, float) or isinstance(threshold, int)):
raise TypeError("threshold must be float or int")
if threshold < 0 or threshold > 1:
raise ValueError("threshold must be between 0 and 1")
if mode not in providers:
raise ValueError(
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
)
if providers[mode] not in ort.get_available_providers():
raise ValueError(
f"Your device is not supported {mode}. Please choose from: cpu"
)
if model_path is not None and not os.path.exists(model_path):
raise FileNotFoundError("Model file not found")
if tags_path is not None and not os.path.exists(tags_path):
raise FileNotFoundError("Tags file not found")
if model_path is None or tags_path is None:
model_path, tags_path = download_model()
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
self.input_name = self.session.get_inputs()[0].name
self.output_name = [output.name for output in self.session.get_outputs()]
self.threshold = threshold
self.pin_memory = pin_memory
self.batch_size = batch_size
self.mode = mode
self.cache = {}
def __str__(self) -> str:
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
def __repr__(self) -> str:
return self.__str__()
def from_image_inference(self, image: Image.Image) -> dict:
image = process_image(image)
return self.predict(image)
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
if image.shape != (1, 512, 512, 3):
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
return self.predict(image)
def from_file_inference(self, image: str) -> dict:
return self.from_image_inference(Image.open(image))
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
if self.pin_memory:
image = [process_image(Image.open(i)) for i in image]
for i in [
image[i : i + self.batch_size]
for i in range(0, len(image), self.batch_size)
]:
imagelist = i
bs = len(i)
_imagelist, idx, hashlist = [], [], []
for j in range(len(i)):
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
hashlist.append(image_hash)
if image_hash in self.cache:
continue
if not self.pin_memory:
_imagelist.append(process_image(img))
else:
_imagelist.append(imagelist[j])
idx.append(j)
imagelist = _imagelist
if len(imagelist) != 0:
_image = np.vstack(imagelist)
results = self.inference(_image)
results_idx = 0
else:
results = []
for i in range(bs):
image_tag = {}
if i in idx:
hash = hashlist[i]
for tag, score in zip(self.tags, results[results_idx]):
if score >= self.threshold:
image_tag[tag] = score
results_idx += 1
self.cache[hash] = image_tag
yield image_tag
else:
yield self.cache[hashlist[i]]
def inference(self, image):
return self.session.run(self.output_name, {self.input_name: image})[0]
def predict(self, image):
result = self.inference(image)
image_tag = {}
for tag, score in zip(self.tags, result[0]):
if score >= self.threshold:
image_tag[tag] = score
return image_tag
def __call__(self, image) -> Union[dict, List[dict]]:
if isinstance(image, str):
return self.from_file_inference(image)
elif isinstance(image, np.ndarray):
return self.from_ndarray_inferece(image)
elif isinstance(image, list) or isinstance(image, tuple):
return self.from_list_inference(image)
elif isinstance(image, Image.Image):
return self.from_image_inference(image)
else:
raise ValueError("Image must be a file path or a numpy array or list/tuple")

View file

@ -0,0 +1,3 @@
from deepdanbooru_onnx import DeepDanbooru
danbooru = DeepDanbooru()
print(danbooru("/run/media/philipp/20404acc-312c-44f2-b2d1-3a0a14257cc6/.Media/porn/00244-3145022840.png"))

View file

@ -0,0 +1,154 @@
#!/bin/python3
import argparse
import os
from typing import Iterator
import cv2
import numpy
from tqdm import tqdm
from wand.exceptions import BlobError
from wand.image import Image
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
image_ext_wand = [".dng", ".arw"]
class LoadException(Exception):
pass
def find_image_files(path: str) -> list[str]:
paths = list()
for root, dirs, files in os.walk(path):
for filename in files:
name, extension = os.path.splitext(filename)
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
paths.append(os.path.join(root, filename))
return paths
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
for path in paths:
name, extension = os.path.splitext(path)
extension = extension.lower()
if extension in image_ext_ocv:
image = cv2.imread(path)
if image is None:
print(f"Warning: could not load {path}")
else:
yield image
elif extension in image_ext_wand:
try:
image = Image(filename=path)
except BlobError as e:
print(f"Warning: could not load {path}, {e}")
continue
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
ret = True
frame_counter = 0
while ret:
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
ret, frame = video.read()
if ret:
yield frame
frame_counter += interval
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
detector.setInputSize([image.shape[1], image.shape[0]])
faces = detector.detect(image)[1]
if faces is None:
return 0, False
for face in faces:
cropped_image = recognizer.alignCrop(image, face)
features = recognizer.feature(cropped_image)
score_accum = 0.0
for referance in referance_features:
score_accum += recognizer.match(referance, features, 0)
score = score_accum / len(referance_features)
if score > thresh:
return score, True
return 0, False
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
images = list()
out = list()
if os.path.isfile(referance_path):
image = cv2.imread(referance_path)
if image is None:
print(f"Could not load image from {referance_path}")
else:
images.append(image)
elif os.path.isdir(referance_path):
filenames = find_image_files(referance_path)
images = list(image_loader(filenames))
for image in images:
detector.setInputSize([image.shape[1], image.shape[0]])
faces = detector.detect(image)[1]
if faces is None:
print("unable to find face in referance image")
exit(1)
image = recognizer.alignCrop(image, faces[0])
features = recognizer.feature(image)
out.append(features)
return out
if __name__ == "__main__":
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
args = parser.parse_args()
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
referance_features = process_referance(detector, recognizer, args.referance)
if len(referance_features) < 1:
print(f"Could not load any referance image(s) from {args.referance}")
exit(1)
if os.path.isfile(args.input):
video = cv2.VideoCapture(args.input)
if not video.isOpened():
print(f"Unable to open {args.input} as a video file")
exit(1)
image_generator = extract_video_images(video, args.skip + 1)
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
elif os.path.isdir(args.input):
image_filenams = find_image_files(args.input)
image_generator = image_loader(image_filenams)
total_images = len(image_filenams)
else:
print(f"{args.input} is not a video file nor is it a directory")
exit(1)
os.makedirs(args.out, exist_ok=True)
progress = tqdm(total=int(total_images), desc="0.00")
counter = 0
for image in image_generator:
if image.shape[0] > 512:
aspect = image.shape[0] / image.shape[1]
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
else:
resized = image
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
if match and not args.invert or not match and args.invert:
filename = f"{counter:04}.png"
cv2.imwrite(os.path.join(args.out, filename), image)
counter += 1
progress.set_description(f"{score:1.2f}")
progress.update()

16
SmartCrop/CMakeLists.txt Normal file
View file

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.6)
find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
add_executable(smartcrop ${SRC_FILES})
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(smartcrop PRIVATE -s -g -Wall)
message(WARNING ${WEIGHT_DIR})
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
install(TARGETS smartcrop RUNTIME DESTINATION bin)

View file

@ -0,0 +1,143 @@
#include "facerecognizer.h"
#include <filesystem>
#define INCBIN_PREFIX r
#include "incbin.h"
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include "log.h"
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
{
if(detectorPath.empty())
{
Log(Log::INFO)<<"Using builtin face detection model";
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from built in file");
}
else
{
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(!detector)
throw LoadException("Unable to load detector network from "+detectorPath.string());
}
bool defaultNetwork = recognizerPath.empty();
if(defaultNetwork)
{
Log(Log::INFO)<<"Using builtin face recognition model";
recognizerPath = cv::tempfile("onnx");
std::ofstream file(recognizerPath);
if(!file.is_open())
throw LoadException("Unable open temporary file at "+recognizerPath.string());
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
file.close();
}
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
if(defaultNetwork)
std::filesystem::remove(recognizerPath);
if(!recognizer)
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
addReferances(referances);
}
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
{
detector->setInputSize(input.size());
cv::Mat faces;
detector->detect(input, faces);
return faces;
}
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
{
bool ret = false;
for(const cv::Mat& image : referances)
{
cv::Mat faces = detectFaces(image);
assert(faces.cols == 15);
if(faces.empty())
{
Log(Log::WARN)<<"A referance image provided dose not contian any face";
continue;
}
if(faces.rows > 1)
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
cv::Mat cropedImage;
recognizer->alignCrop(image, faces.row(0), cropedImage);
cv::Mat features;
recognizer->feature(cropedImage, features);
referanceFeatures.push_back(features.clone());
ret = true;
}
return ret;
}
void FaceRecognizer::setThreshold(double threasholdIn)
{
threshold = threasholdIn;
}
double FaceRecognizer::getThreshold()
{
return threshold;
}
void FaceRecognizer::clearReferances()
{
referanceFeatures.clear();
}
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
{
cv::Mat faces = detectFaces(input);
Detection bestMatch;
bestMatch.confidence = 0;
bestMatch.person = -1;
if(alone && faces.rows > 1)
{
bestMatch.person = -2;
return bestMatch;
}
for(int i = 0; i < faces.rows; ++i)
{
cv::Mat face;
recognizer->alignCrop(input, faces.row(i), face);
cv::Mat features;
recognizer->feature(face, features);
features = features.clone();
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
{
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
if(score > threshold && score > bestMatch.confidence)
{
bestMatch.confidence = score;
bestMatch.person = referanceIndex;
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
}
}
}
return bestMatch;
}

View file

@ -0,0 +1,48 @@
#pragma once
#include <exception>
#include <opencv2/core/mat.hpp>
#include <opencv2/objdetect/face.hpp>
#include <opencv2/core.hpp>
#include <vector>
#include <memory>
#include <filesystem>
class FaceRecognizer
{
public:
struct Detection
{
int person;
float confidence;
cv::Rect rect;
};
class LoadException : public std::exception
{
private:
std::string message;
public:
LoadException(const std::string& msg): std::exception(), message(msg) {}
virtual const char* what() const throw() override
{
return message.c_str();
}
};
private:
std::vector<cv::Mat> referanceFeatures;
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
std::shared_ptr<cv::FaceDetectorYN> detector;
double threshold = 0.363;
public:
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
cv::Mat detectFaces(const cv::Mat& input);
Detection isMatch(const cv::Mat& input, bool alone = false);
bool addReferances(const std::vector<cv::Mat>& referances);
void setThreshold(double threashold);
double getThreshold();
void clearReferances();
};

View file

@ -31,11 +31,12 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
} }
} }
cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude) cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
{ {
int radius = std::min(imageSize.height, imageSize.width)/2; incompleate = false;
int diameter = std::min(imageSize.height, imageSize.width);
cv::Point2i point(imageSize.width/2, imageSize.height/2); cv::Point2i point(imageSize.width/2, imageSize.height/2);
cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2); cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
std::sort(mustInclude.begin(), mustInclude.end(), std::sort(mustInclude.begin(), mustInclude.end(),
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);}); [&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -43,8 +44,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
while(true) while(true)
{ {
cv::Rect includeRect = rectFromPoints(mustInclude); cv::Rect includeRect = rectFromPoints(mustInclude);
if(includeRect.width-2 > radius || includeRect.height-2 > radius) if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
{ {
incompleate = true;
slideRectToPoint(candiate, mustInclude.back().first); slideRectToPoint(candiate, mustInclude.back().first);
mustInclude.pop_back(); mustInclude.pop_back();
Log(Log::DEBUG)<<"cant fill"; Log(Log::DEBUG)<<"cant fill";
@ -52,7 +54,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second; Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
} }
else else
{
break; break;
}
} }
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude) for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
@ -75,25 +79,30 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
personId = yolo.getClassForStr("person"); personId = yolo.getClassForStr("person");
} }
cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize) bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
{ {
if(!detections.empty()) std::vector<std::pair<cv::Point2i, int>> corners;
for(size_t i = 0; i < detections.size(); ++i)
{ {
std::vector<std::pair<cv::Point2i, int>> corners; int priority = detections[i].priority;
for(size_t i = 0; i < detections.size(); ++i) if(detections[i].class_id == personId)
{
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
corners.push_back({detections[i].box.tl(), priority+1});
corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
}
else
{ {
int priority = detections[i].priority;
if(detections[i].class_id == personId)
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
corners.push_back({detections[i].box.tl(), priority}); corners.push_back({detections[i].box.tl(), priority});
corners.push_back({detections[i].box.br(), priority}); corners.push_back({detections[i].box.br(), priority});
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority}); corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
} }
return maxRect(imageSize, corners);
} }
Log(Log::DEBUG)<<"Using center crop as there are no detections"; bool incompleate;
return maxRect(imageSize); out = maxRect(incompleate, imageSize, corners);
return incompleate;
} }

View file

@ -10,9 +10,9 @@ private:
int personId; int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center); static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point); static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {}); static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public: public:
InteligentRoi(const Yolo& yolo); InteligentRoi(const Yolo& yolo);
cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize); bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
}; };

440
SmartCrop/main.cpp Normal file
View file

@ -0,0 +1,440 @@
#include <filesystem>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <algorithm>
#include <execution>
#include <string>
#include <vector>
#include <numeric>
#include "yolo.h"
#include "log.h"
#include "options.h"
#include "utils.h"
#include "intelligentroi.h"
#include "seamcarving.h"
#include "facerecognizer.h"
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(ignore && ignore == &detection)
continue;
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
{
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
{
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.x > x)
{
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
closest = &detection;
}
}
if(closest)
x = closest->box.x;
else
x = imgSizeX;
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
return false;
}
else
{
x = inDetection->box.br().x;
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().x > x)
{
Log(Log::DEBUG)<<"it is again in a box";
return findRegionEndpointHoriz(x, detections, imgSizeX);
}
else
{
Log(Log::DEBUG)<<"it is not in a box";
return true;
}
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
for(int x = 0; x < image.cols; ++x)
{
int start = x;
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
int width = x-start;
if(x < image.cols)
++width;
cv::Rect rect(start, 0, width, image.rows);
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
cv::Mat slice = image(rect);
out.push_back({slice, frozen});
}
return out;
}
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
{
assert(!slices.empty());
int cols = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
cols += slice.first.cols;
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
int col = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
Log(Log::DEBUG)<<__func__<<' '<<rect;
slice.first.copyTo(image(rect));
col += slice.first.cols-1;
}
return image;
}
void transposeRect(cv::Rect& rect)
{
int x = rect.x;
rect.x = rect.y;
rect.y = x;
int width = rect.width;
rect.width = rect.height;
rect.height = width;
}
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
{
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
double aspectRatio = image.cols/static_cast<double>(image.rows);
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
bool vertical = false;
if(aspectRatio > targetAspectRatio)
vertical = true;
int requiredLines = 0;
if(!vertical)
requiredLines = image.rows*targetAspectRatio - image.cols;
else
requiredLines = image.cols/targetAspectRatio - image.rows;
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
if(vertical)
{
cv::transpose(image, image);
for(Yolo::Detection& detection : detections)
transposeRect(detection.box);
}
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
int totalResizableSize = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
if(!slice.second)
totalResizableSize += slice.first.cols;
}
if(totalResizableSize < requiredLines+1)
{
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
if(vertical)
cv::transpose(image, image);
return false;
}
std::vector<int> seamsForSlice(slices.size(), 0);
for(size_t i = 0; i < slices.size(); ++i)
{
if(!slices[i].second)
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
}
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
for(ssize_t i = slices.size()-1; i >= 0; --i)
{
if(!slices[i].second)
{
seamsForSlice[i] += residual;
break;
}
}
for(size_t i = 0; i < slices.size(); ++i)
{
if(seamsForSlice[i] != 0)
{
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
if(!ret)
{
if(vertical)
transpose(image, image);
return false;
}
}
}
image = assembleFromSlicesHoriz(slices);
if(vertical)
cv::transpose(image, image);
return true;
}
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
{
for(const Yolo::Detection& detection : detections)
{
cv::rectangle(image, detection.box, detection.color, 3);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
cv::rectangle(image, textBox, detection.color, cv::FILLED);
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
}
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
}
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
{
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
if(std::max(image.cols, image.rows) > longTargetSize)
{
if(image.cols > image.rows)
{
double ratio = static_cast<double>(longTargetSize)/image.cols;
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
else
{
double ratio = static_cast<double>(longTargetSize)/image.rows;
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
}
}
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
{
InteligentRoi intRoi(yolo);
cv::Mat image = cv::imread(path);
if(!image.data)
{
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
return;
}
reduceSize(image, config.targetSize);
std::vector<Yolo::Detection> detections = yolo.runInference(image);
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
for(Yolo::Detection& detection : detections)
{
bool hasmatch = false;
if(recognizer && detection.className == "person")
{
cv::Mat person = image(detection.box);
reconizerMutex.lock();
FaceRecognizer::Detection match = recognizer->isMatch(person);
reconizerMutex.unlock();
if(match.person >= 0)
{
detection.priority += 10;
hasmatch = true;
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
}
}
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
}
cv::Rect crop;
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
if(config.seamCarving && incompleate)
{
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
{
detections = yolo.runInference(image);
}
}
cv::Mat croppedImage;
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
{
intRoi.getCropRectangle(crop, detections, image.size());
if(config.debug)
{
cv::Mat debugImage = image.clone();
drawDebugInfo(debugImage, crop, detections);
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
if(!ret)
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
}
croppedImage = image(crop);
}
else if(!incompleate)
{
croppedImage = image(crop);
}
else
{
croppedImage = image;
}
cv::Mat resizedImage;
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
if(!ret)
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
}
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
{
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
for(std::filesystem::path path : images)
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
}
template<typename T>
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
{
std::vector<std::vector<T>> out;
size_t length = vec.size()/parts;
size_t remain = vec.size() % parts;
size_t begin = 0;
size_t end = 0;
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
{
end += (remain > 0) ? (length + !!(remain--)) : length;
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
begin = end;
}
return out;
}
int main(int argc, char* argv[])
{
Log::level = Log::INFO;
Config config;
argp_parse(&argp, argc, argv, 0, 0, &config);
if(config.outputDir.empty())
{
Log(Log::ERROR)<<"a output path \"-o\" is required";
return 1;
}
if(config.imagePaths.empty())
{
Log(Log::ERROR)<<"at least one input image or directory is required";
return 1;
}
std::vector<std::filesystem::path> imagePaths;
for(const std::filesystem::path& path : config.imagePaths)
getImageFiles(path, imagePaths);
Log(Log::DEBUG)<<"Images:";
for(const::std::filesystem::path& path: imagePaths)
Log(Log::DEBUG)<<path;
if(imagePaths.empty())
{
Log(Log::ERROR)<<"no image was found\n";
return 1;
}
if(!std::filesystem::exists(config.outputDir))
{
if(!std::filesystem::create_directory(config.outputDir))
{
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
return 1;
}
}
std::filesystem::path debugOutputPath(config.outputDir/"debug");
if(config.debug)
{
if(!std::filesystem::exists(debugOutputPath))
std::filesystem::create_directory(debugOutputPath);
}
FaceRecognizer* recognizer = nullptr;
std::mutex recognizerMutex;
if(!config.focusPersonImage.empty())
{
cv::Mat personImage = cv::imread(config.focusPersonImage);
if(personImage.empty())
{
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
return 1;
}
recognizer = new FaceRecognizer();
recognizer->addReferances({personImage});
recognizer->setThreshold(config.threshold);
}
std::vector<std::thread> threads;
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
for(std::thread& thread : threads)
thread.join();
return 0;
}

98
SmartCrop/options.h Normal file
View file

@ -0,0 +1,98 @@
#pragma once
#include <string>
#include <vector>
#include <argp.h>
#include <iostream>
#include <filesystem>
#include <opencv2/core/types.hpp>
#include "log.h"
const char *argp_program_version = "AIImagePreprocesses";
const char *argp_program_bug_address = "<carl@uvos.xyz>";
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
static char args_doc[] = "FILE(S)";
static struct argp_option options[] =
{
{"verbose", 'v', 0, 0, "Show debug messages" },
{"quiet", 'q', 0, 0, "only output data" },
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
{0}
};
struct Config
{
std::vector<std::filesystem::path> imagePaths;
std::filesystem::path modelPath;
std::filesystem::path classesPath;
std::filesystem::path outputDir;
std::filesystem::path focusPersonImage;
bool seamCarving = false;
bool debug = false;
double threshold = 0.363;
cv::Size targetSize = cv::Size(512, 512);
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
try
{
switch (key)
{
case 'q':
Log::level = Log::ERROR;
break;
case 'v':
Log::level = Log::DEBUG;
break;
case 'm':
config->modelPath = arg;
break;
case 'c':
config->classesPath = arg;
break;
case 'd':
config->debug = true;
break;
case 'o':
config->outputDir.assign(arg);
break;
case 's':
config->seamCarving = true;
break;
case 'f':
config->focusPersonImage = arg;
break;
case 't':
config->threshold = std::atof(arg);
break;
case 'z':
{
int x = std::stoi(arg);
config->targetSize = cv::Size(x, x);
break;
}
case ARGP_KEY_ARG:
config->imagePaths.push_back(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
}
catch(const std::invalid_argument& ex)
{
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
return ARGP_KEY_ERROR;
}
return 0;
}
static struct argp argp = {options, parse_opt, args_doc, doc};

View file

@ -1,19 +1,19 @@
#include "seamcarving.h" #include "seamcarving.h"
#include <opencv2/imgcodecs.hpp> #include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp> #include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp> #include <opencv2/imgproc.hpp>
#include <iostream> #include <iostream>
#if __cplusplus >= 201703L
#include <filesystem> #include <filesystem>
#endif
#include <cfloat> #include <cfloat>
#include <vector>
#include "log.h"
SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) : bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
image(img), seams(seams), grow(grow) {}
void SeamCarving::init()
{ {
cv::Mat newFrame = image.clone(); cv::Mat newFrame = image.clone();
assert(!newFrame.empty());
std::vector<std::vector<int>> vecSeams;
for(int i = 0; i < seams; i++) for(int i = 0; i < seams; i++)
{ {
@ -24,230 +24,55 @@ void SeamCarving::init()
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
{ return false;
finalImage = image;
break;
}
std::vector<int> seam = getLeastImportantPath(pathIntensityMat); std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
vecSeams.push_back(seam); vecSeams.push_back(seam);
if(seamsVect)
seamsVect->push_back(seam);
newFrame = removeLeastImportantPath(newFrame,seam); newFrame = removeLeastImportantPath(newFrame, seam);
if(newFrame.rows == 0 && newFrame.cols == 0) if(newFrame.rows == 0 || newFrame.cols == 0)
{ return false;
finalImage = image;
break;
}
} }
if (grow) if (grow)
{ {
cv::Mat growMat = image.clone(); cv::Mat growMat = image.clone();
for (int i = 0; i < vecSeams.size(); i++) for(size_t i = 0; i < vecSeams.size(); i++)
{ {
growMat = addLeastImportantPath(growMat,vecSeams[i]); growMat = addLeastImportantPath(growMat,vecSeams[i]);
} }
finalImage = growMat; image = growMat;
} }
else else
{ {
finalImage = newFrame; image = newFrame;
} }
return true;
sliderPos = seams;
} }
void SeamCarving::computeNewFinalImage(int sliderPos) bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
{ {
if(sliderPos == 0) cv::transpose(image, image);
{ bool ret = strechImage(image, seams, grow, seamsVect);
finalImage = image; cv::transpose(image, image);
return; return ret;
}
if(sliderPos < 1 || sliderPos >= sliderMax-1)
{
return;
}
if(sliderPos > vecSeams.size())
{
cv::Mat newFrame = finalImage.clone();
for(int i = vecSeams.size()-1; i < sliderPos; i++)
{
//Gradient Magnitude for intensity of image.
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
//Use DP to create the real energy map that is used for path calculation.
// Strictly using vertical paths for testing simplicity.
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
{
finalImage = image;
break;
}
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
vecSeams.push_back(seam);
newFrame = removeLeastImportantPath(newFrame,seam);
if(newFrame.rows == 0 && newFrame.cols == 0)
{
finalImage = image;
break;
}
}
if (grow)
{
cv::Mat growMat = image.clone();
for (int i = 0; i < vecSeams.size(); i++)
{
growMat = addLeastImportantPath(growMat,vecSeams[i]);
}
finalImage = growMat;
}
else
{
finalImage = newFrame;
}
}
else if (sliderPos < vecSeams.size())
{
cv::Mat newFrame = image.clone();
for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not)
{
if (grow)
{
newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
}
else
{
newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
}
if(newFrame.rows == 0 && newFrame.cols == 0)
{
finalImage = image;
break;
}
}
finalImage = newFrame;
}
} }
const cv::Mat& SeamCarving::getFinalImage() bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
{ {
return finalImage; std::vector<std::vector<int>> seamsVect;
} seamsImage = image.clone();
void SeamCarving::showSeamsImg() bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
{ if(!ret)
cv::Mat seamsFrame = image.clone(); return false;
//std::cout << "sliderPos: " << sliderPos << std::endl;
for(int i = 0; i < sliderPos; i++)
{
seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
}
cv::imwrite("output/seams_image.jpg", seamsFrame);
cv::imshow( "Image Seams", seamsFrame);
}
static void onChange( int pos, void* object ) for(size_t i = 0; i < seamsVect.size(); ++i)
{ seamsImage = drawSeam(seamsImage, seamsVect[i]);
SeamCarving* sc = (SeamCarving*)(object); return true;
/*if(sc->getBlockUpdateStatus()) {
return;
}*/
sc->computeNewFinalImage(pos);
imshow("Final Image", sc->getFinalImage());
#if DEBUG
sc->showSeamsImg();
#endif
}
static void onMouse( int event, int x, int y, int, void* object)
{
SeamCarving* sc = (SeamCarving*)(object);
if( event == cv::EVENT_LBUTTONDOWN ||
event == cv::EVENT_RBUTTONDOWN ||
event == cv::EVENT_MBUTTONDOWN
)
{
sc->setBlockUpdate(true);
}
else if(event == cv::EVENT_LBUTTONUP ||
event == cv::EVENT_RBUTTONUP ||
event == cv::EVENT_MBUTTONUP)
{
sc->setBlockUpdate(false);
}
}
void SeamCarving::setBlockUpdate(bool bUpdate)
{
blockUpdate = bUpdate;
}
bool SeamCarving::getBlockUpdateStatus()
{
return blockUpdate;
}
void SeamCarving::showImage()
{
#if __cplusplus >= 201703L
if(!std::filesystem::exists("output"))
{
std::filesystem::create_directory("output");
}
#endif
if( image.empty() )
{
std::cout << "Could not open raw image" << std::endl ;
return;
}
namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
cv::imshow( "Raw Image", image );
if( finalImage.empty() )
{
std::cout << "Could not open final image" << std::endl ;
return;
}
#if DEBUG
namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
cv::Mat gradient = computeGradientMagnitude(image);
cv::Mat u8_image;
gradient.convertTo(u8_image, CV_8U);
cv::imwrite("output/gradient_image.jpg", u8_image);
cv::imshow("gradient Image", u8_image);
namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
cv::Mat u8_image2;
cv::Mat intensityMat = computePathIntensityMat(gradient);
cv::Mat dst;
cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
dst.convertTo(u8_image2, CV_8U);
cv::imwrite("output/intensity_image.jpg", u8_image2);
cv::imshow( "intensity Image", u8_image2);
//cv::Mat engImg = GetEnergyImg(image);
//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
//cv::Mat u8_image3;
//engImg.convertTo(u8_image3, CV_8U);
//cv::imshow( "energy Image", u8_image3);
namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
showSeamsImg();
#endif
namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
//cv::setMouseCallback("Final Image", onMouse, this );
cv::imwrite("output/final_image.jpg", finalImage);
cv::imshow("Final Image", finalImage);
cv::waitKey(0);
} }
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img) cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
@ -392,9 +217,7 @@ cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std
cv::Size size = cv::Size(orgSize.width-1, orgSize.height); cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type()); cv::Mat newMat = cv::Mat(size, original.type());
unsigned char *rawOrig = original.data; for(size_t row = 0; row < seam.size(); row++)
unsigned char *rawOutput = newMat.data;
for(int row = 0; row < seam.size(); row++)
{ {
removePixel(original, newMat, row, seam[row]); removePixel(original, newMat, row, seam[row]);
} }
@ -460,9 +283,7 @@ cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::v
cv::Size size = cv::Size(orgSize.width+1, orgSize.height); cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
cv::Mat newMat = cv::Mat(size, original.type()); cv::Mat newMat = cv::Mat(size, original.type());
unsigned char *rawOrig = original.data; for(size_t row = 0; row < seam.size(); row++)
unsigned char *rawOutput = newMat.data;
for(int row = 0; row < seam.size(); row++)
{ {
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl; //std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
addPixel(original, newMat, row, seam[row]); addPixel(original, newMat, row, seam[row]);
@ -518,3 +339,18 @@ void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row,
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
} }
} }
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int row = 0; row < frame.rows; row++)
{
for(int col = 0; col < frame.cols; col++)
{
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
}
}
return retMat;
}

24
SmartCrop/seamcarving.h Normal file
View file

@ -0,0 +1,24 @@
#pragma once
#include <opencv2/core/core.hpp>
#include <vector>
class SeamCarving
{
private:
static cv::Mat GetEnergyImg(const cv::Mat &img);
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
static float intensity(float currIndex, int start, int end);
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
public:
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
};

View file

@ -11,8 +11,8 @@
#define INCBIN_PREFIX r #define INCBIN_PREFIX r
#include "incbin.h" #include "incbin.h"
INCTXT(defaultClasses, "../classes.txt"); INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
INCBIN(defaultModel, "../yolov8x.onnx"); INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape, Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
const std::filesystem::path& classesTxtFilePath, bool runWithOCl) const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
@ -22,6 +22,7 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
if(classesTxtFilePath.empty()) if(classesTxtFilePath.empty())
{ {
Log(Log::INFO)<<"Using builtin classes";
loadClasses(rdefaultClassesData); loadClasses(rdefaultClassesData);
} }
else else
@ -31,19 +32,21 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
} }
if(!modelPath.empty()) if(!modelPath.empty())
{
net = cv::dnn::readNetFromONNX(modelPath); net = cv::dnn::readNetFromONNX(modelPath);
}
else else
{
Log(Log::INFO)<<"Using builtin yolo model";
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize); net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
}
if(runWithOCl) if(runWithOCl)
{ {
std::cout << "\nRunning on OCV" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT); net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL); net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
} }
else else
{ {
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
} }
@ -176,14 +179,33 @@ std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
result.className = classes[result.class_id].first; result.className = classes[result.class_id].first;
result.priority = classes[result.class_id].second; result.priority = classes[result.class_id].second;
clampBox(boxes[idx], input.size());
result.box = boxes[idx]; result.box = boxes[idx];
detections.push_back(result); detections.push_back(result);
} }
return detections; return detections;
} }
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
{
if(box.x < 0)
{
box.width += box.x;
box.x = 0;
}
if(box.y < 0)
{
box.height += box.y;
box.y = 0;
}
if(box.x+box.width > size.width)
box.width = size.width - box.x;
if(box.y+box.height > size.height)
box.height = size.height - box.y;
}
void Yolo::loadClasses(const std::string& classesStr) void Yolo::loadClasses(const std::string& classesStr)
{ {
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\'); std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');

View file

@ -27,19 +27,16 @@ private:
static constexpr float modelScoreThreshold = 0.45; static constexpr float modelScoreThreshold = 0.45;
static constexpr float modelNMSThreshold = 0.50; static constexpr float modelNMSThreshold = 0.50;
std::string modelPath;
std::vector<std::pair<std::string, int>> classes;
cv::Size2f modelShape;
bool letterBoxForSquare = true;
cv::dnn::Net net;
void loadClasses(const std::string& classes); void loadClasses(const std::string& classes);
void loadOnnxNetwork(const std::filesystem::path& path); void loadOnnxNetwork(const std::filesystem::path& path);
cv::Mat formatToSquare(const cv::Mat &source); cv::Mat formatToSquare(const cv::Mat &source);
static void clampBox(cv::Rect& box, const cv::Size& size);
std::string modelPath;
std::vector<std::pair<std::string, int>> classes;
cv::Size2f modelShape;
bool letterBoxForSquare = true;
cv::dnn::Net net;
public: public:
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480}, Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},

80
Weights/classes.txt Normal file
View file

@ -0,0 +1,80 @@
person, 10
bicycle, 4
car, 3
motorcycle, 4
airplane, 4
bus, 4
train, 4
truck, 3
boat, 4
traffic light, 1
fire hydrant, 1
stop sign, 1
parking meter, 1
bench, 2
bird, 5
cat, 6
dog, 5
horse, 4
sheep, 5
cow, 4
elephant, 5
bear, 5
zebra, 5
giraffe, 5
backpack, 3
umbrella, 3
handbag, 3
tie, 3
suitcase, 2
frisbee, 3
skis, 3
snowboard, 3
sports ball, 3
kite, 4
baseball bat, 3
baseball glove, 3
skateboard, 3
surfboard, 3
tennis racket, 3
bottle, 2
wine glass, 2
cup, 2
fork, 1
knife, 1
spoon, 1
bowl, 1
banana, 1
apple, 1
sandwich,1
orange, 1
broccoli, 1
carrot, 1
hot dog, 1
pizza, 1
donut, 2
cake, 2
chair, 1
couch, 1
potted plant, 1
bed, 1
dining table, 1
toilet, 1
tv, 1
laptop, 1
mouse, 1
remote, 1
keyboard, 1
cell phone, 1
microwave, 1
oven, 1
toaster, 1
sink, 1
refrigerator, 1
book, 1
clock, 1
vase, 1
scissors, 1
teddy bear, 1
hair drier, 1
toothbrush, 1

Binary file not shown.

Binary file not shown.

BIN
Weights/yolov8x.onnx Normal file

Binary file not shown.

295
main.cpp
View file

@ -1,295 +0,0 @@
#include <filesystem>
#include <iostream>
#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
#include <algorithm>
#include <vector>
#include "yolo.h"
#include "log.h"
#include "options.h"
#include "utils.h"
#include "intelligentroi.h"
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(!ignore || ignore != &detection)
continue;
if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
{
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
{
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.x > x)
{
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
closest = &detection;
}
}
if(closest)
x = closest->box.x;
else
x = imgSizeX;
return false;
}
else
{
x = inDetection->box.br().x;
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().x > x)
return findRegionEndpointHoriz(x, detections, imgSizeX);
else
return true;
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
for(int x = 0; x < image.cols; ++x)
{
int start = x;
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
out.push_back({slice, frozen});
}
return out;
}
const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
{
const Yolo::Detection* inDetection = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(!ignore || ignore != &detection)
continue;
if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
{
if(!inDetection || detection.box.br().y > inDetection->box.br().y)
inDetection = &detection;
}
}
return inDetection;
}
bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
{
const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
if(!inDetection)
{
const Yolo::Detection* closest = nullptr;
for(const Yolo::Detection& detection : detections)
{
if(detection.box.y > y)
{
if(closest == nullptr || detection.box.y-y > closest->box.y-y)
closest = &detection;
}
}
if(closest)
y = closest->box.y;
else
y = imgSizeY;
return false;
}
else
{
y = inDetection->box.br().y;
const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
if(candidateDetection && candidateDetection->box.br().y > y)
return findRegionEndpointVert(y, detections, imgSizeY);
else
return true;
}
}
std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
{
std::vector<std::pair<cv::Mat, bool>> out;
for(int y = 0; y < image.rows; ++y)
{
int start = y;
bool frozen = findRegionEndpointVert(y, detections, image.rows);
cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
out.push_back({slice, frozen});
}
return out;
}
bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
{
double aspectRatio = image.cols/static_cast<double>(image.rows);
bool vertical = false;
cv::Mat workImage;
if(aspectRatio > targetAspectRatio)
vertical = true;
int requiredLines = 0;
if(!vertical)
requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
else
requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
if(!vertical)
{
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
int totalResizableSize = 0;
for(const std::pair<cv::Mat, bool>& slice : slices)
{
if(slice.second)
totalResizableSize += slice.first.cols;
}
std::vector<int> seamsForSlice(slices.size());
for(size_t i = 0; i < slices.size(); ++i)
{
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
}
}
else
{
int totalResizableSize = 0;
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
}
}
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
{
for(const Yolo::Detection& detection : detections)
{
cv::rectangle(image, detection.box, detection.color, 4);
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
cv::rectangle(image, textBox, detection.color, cv::FILLED);
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
}
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
}
int main(int argc, char* argv[])
{
Log::level = Log::INFO;
Config config;
argp_parse(&argp, argc, argv, 0, 0, &config);
if(config.outputDir.empty())
{
Log(Log::ERROR)<<"a output path \"-o\" is required";
return 1;
}
if(config.imagePaths.empty())
{
Log(Log::ERROR)<<"at least one input image or directory is required";
return 1;
}
std::vector<std::filesystem::path> imagePaths;
for(const std::filesystem::path& path : config.imagePaths)
getImageFiles(path, imagePaths);
if(imagePaths.empty())
{
Log(Log::ERROR)<<"no image was found\n";
return 1;
}
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
InteligentRoi intRoi(yolo);
if(!std::filesystem::exists(config.outputDir))
{
if(!std::filesystem::create_directory(config.outputDir))
{
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
return 1;
}
}
std::filesystem::path debugOutputPath(config.outputDir/"debug");
if(config.debug)
{
if(!std::filesystem::exists(debugOutputPath))
std::filesystem::create_directory(debugOutputPath);
}
for(const std::filesystem::path& path : imagePaths)
{
cv::Mat image = cv::imread(path);
if(!image.data)
{
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
continue;
}
if(std::max(image.cols, image.rows) > 1024)
{
if(image.cols > image.rows)
{
double ratio = 1024.0/image.cols;
cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
}
else
{
double ratio = 1024.0/image.rows;
cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
}
}
std::vector<Yolo::Detection> detections = yolo.runInference(image);
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
for(const Yolo::Detection& detection : detections)
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
cv::Mat debugImage = image.clone();
drawDebugInfo(debugImage, crop, detections);
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
if(!ret)
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
cv::Mat croppedImage = image(crop);
cv::Mat resizedImage;
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
if(!ret)
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
}
return 0;
}

View file

@ -1,70 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <argp.h>
#include <iostream>
#include <filesystem>
#include "log.h"
const char *argp_program_version = "AIImagePreprocesses";
const char *argp_program_bug_address = "<carl@uvos.xyz>";
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
static char args_doc[] = "[IMAGES]";
static struct argp_option options[] =
{
{"verbose", 'v', 0, 0, "Show debug messages" },
{"quiet", 'q', 0, 0, "only output data" },
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
{"debug", 'd', 0, 0, "output debug images" },
{"seam-carving", 's', 0, 0, "model to train: "}
};
struct Config
{
std::vector<std::filesystem::path> imagePaths;
std::filesystem::path modelPath;
std::filesystem::path classesPath;
std::filesystem::path outputDir;
bool seamCarving = false;
bool debug = false;
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
switch (key)
{
case 'q':
Log::level = Log::ERROR;
break;
case 'v':
Log::level = Log::DEBUG;
break;
case 'm':
config->modelPath = arg;
break;
case 'c':
config->classesPath = arg;
break;
case 'd':
config->debug = true;
break;
case 'o':
config->outputDir.assign(arg);
break;
case 's':
config->seamCarving = true;
break;
case ARGP_KEY_ARG:
config->imagePaths.push_back(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = {options, parse_opt, args_doc, doc};

View file

@ -1,61 +0,0 @@
#ifndef __SEAM__CARVING_HPP__
#define __SEAM__CARVING_HPP__
#include <opencv2/core/core.hpp>
#define DEBUG 0
class SeamCarving {
public:
void showImage();
const cv::Mat& getFinalImage();
virtual void computeNewFinalImage(int pos);
void setBlockUpdate(bool bUpdate);
bool getBlockUpdateStatus();
virtual void showSeamsImg();
protected:
SeamCarving(const cv::Mat &img, int seams, bool grow);
void init();
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
cv::Mat image;
cv::Mat finalImage;
int seams;
bool grow;
int sliderMax;
int sliderPos;
std::vector<std::vector<int>> vecSeams;
private:
cv::Mat GetEnergyImg(const cv::Mat &img);
cv::Mat computeGradientMagnitude(const cv::Mat &frame);
float intensity(float currIndex, int start, int end);
cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
bool blockUpdate = false;
};
class SeamCarvingHorizontal : public SeamCarving
{
public:
SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
protected:
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
};
class SeamCarvingVertical : public SeamCarving {
public:
SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
virtual void computeNewFinalImage(int pos) override;
#if DEBUG
virtual void showSeamsImg() override;
#endif
protected:
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
};
#endif // __SEAM__CARVING_HPP__

View file

@ -1,28 +0,0 @@
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <cfloat>
cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int row = 0; row < frame.rows; row++)
{
for(int col = 0; col < frame.cols; col++)
{
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
}
}
return retMat;
}
SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
{
sliderMax = image.cols;
init();
}

View file

@ -1,51 +0,0 @@
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <cfloat>
SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
{
sliderMax = image.rows;
cv::Mat oldImage = image;
image = image.t();
init();
image = oldImage;
finalImage = finalImage.t();
}
cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
{
cv::Mat retMat = frame.clone();
for(int col = 0; col < frame.cols; col++)
{
for(int row = 0; row < frame.rows; row++)
{
retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
}
}
return retMat;
}
void SeamCarvingVertical::computeNewFinalImage(int pos)
{
cv::Mat oldImage = image;
image = image.t();
SeamCarving::computeNewFinalImage(pos);
image = oldImage;
finalImage = finalImage.t();
}
#if DEBUG
void SeamCarvingVertical::showSeamsImg()
{
cv::Mat oldImage = this->image;
this->image = this->image.t();
SeamCarving::showImage();
this->image = oldImage;
}
#endif