add inital non-working Danbooru tagger as a addition to llava tagging

Add the option to focus on a spcific person
PersonDatasetAssembler: add the option to mach images that do NOT contain the specified person
2024-06-07 14:09:36 +02:00 · 2024-06-07 14:04:48 +02:00 · 2024-06-07 14:04:07 +02:00 · 2024-04-05 12:46:06 +02:00 · 2024-04-05 12:23:11 +02:00 · 2024-04-05 11:48:10 +02:00
34 changed files with 1505 additions and 760 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,15 +1,7 @@
 cmake_minimum_required(VERSION 3.6)
-project(AIImagePrepross)
+project(ImageAiUtils)
 find_package(OpenCV REQUIRED)
 set(CMAKE_CXX_STANDARD 17)
 set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
-set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
+add_subdirectory(SmartCrop)
 add_executable(${PROJECT_NAME} ${SRC_FILES})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
 install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
--- a/DanbooruTagger/DanbooruTagger.py
+++ b/DanbooruTagger/DanbooruTagger.py
@ -0,0 +1,141 @@
 import warnings
 from deepdanbooru_onnx import DeepDanbooru
 import argparse
 import cv2
 import torch
 import os
 import numpy
 from typing import Iterator
 from torch.multiprocessing import Process, Queue
 import json
 from tqdm import tqdm
 image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
 def find_image_files(path: str) -> list[str]:
 	paths = list()
 	for root, dirs, files in os.walk(path):
 		for filename in files:
 			name, extension = os.path.splitext(filename)
 			if extension.lower() in image_ext_ocv:
 				paths.append(os.path.join(root, filename))
 	return paths
 def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
 	for path in paths:
 		name, extension = os.path.splitext(path)
 		extension = extension.lower()
 		imagebgr = cv2.imread(path)
 		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
 		if image is None:
 			print(f"Warning: could not load {path}")
 		else:
 			yield image, path
 def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
 	model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
 		quantization_config=BitsAndBytesConfig(
 			load_in_4bit=True,
 			bnb_4bit_compute_dtype=torch.float16,
 			bnb_4bit_use_double_quant=False,
 			bnb_4bit_quant_type='nf4',
 			), device_map=device, attn_implementation="flash_attention_2")
 	processor = AutoProcessor.from_pretrained(model_name_or_path)
 	image_generator = image_loader(image_paths)
 	stop = False
 	finished_count = 0
 	while not stop:
 		prompts = list()
 		images = list()
 		filenames = list()
 		for i in range(0, batch_size):
 			image, filename = next(image_generator, (None, None))
 			if image is None:
 				stop = True
 				break
 			filenames.append(filename)
 			images.append(image)
 			prompts.append(prompt)
 		if len(images) == 0:
 			break
 		inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
 		generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
 		decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 		finished_count += len(images)
 		for i, decoded in enumerate(decodes):
 			trim = len(prompt) - len("<image>")
 			queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
 def split_list(input_list, count):
 	target_length = int(len(input_list) / count)
 	for i in range(0, count - 1):
 		yield input_list[i * target_length: (i + 1) * target_length]
 	yield input_list[(count - 1) * target_length: len(input_list)]
 def save_meta(meta_file, meta, reldir, common_description):
 	meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
 	if common_description is not None:
 		meta["text"] = common_description + meta["text"]
 	meta_file.write(json.dumps(meta) + '\n')
 if __name__ == "__main__":
 	parser = argparse.ArgumentParser("A script to tag images via llava")
 	parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
 	parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
 	parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
 	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
 	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
 	parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
 	args = parser.parse_args()
 	prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
 	os.environ["BITSANDBYTES_NOWELCOME"] = "1"
 	image_paths = find_image_files(args.image_dir)
 	image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
 	print(f"Will use {torch.cuda.device_count()} processies to create tags")
 	logging.set_verbosity_error()
 	warnings.filterwarnings("ignore")
 	torch.multiprocessing.set_start_method('spawn')
 	queue = Queue()
 	processies = list()
 	for i in range(0, torch.cuda.device_count()):
 		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
 		processies[-1].start()
 	progress = tqdm(desc="Generateing tags", total=len(image_paths))
 	exit = False
 	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
 		while not exit:
 			if not queue.empty():
 				meta = queue.get()
 				save_meta(output_file, meta, args.image_dir, args.common_description)
 				progress.update()
 			exit = True
 			for process in processies:
 				if process.is_alive():
 					exit = False
 					break
 		while not queue.empty():
 			meta = queue.get()
 			save_meta(output_file, meta, args.image_dir, args.common_description)
 			progress.update()
 	for process in processies:
 		process.join()
--- a/DanbooruTagger/deepdanbooru_onnx/init.py
+++ b/DanbooruTagger/deepdanbooru_onnx/init.py
@ -0,0 +1,3 @@
 from .deepdanbooru_onnx import DeepDanbooru
 from .deepdanbooru_onnx import process_image
 __version__ = '0.0.8'
--- a/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
+++ b/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
@ -0,0 +1,244 @@
 import onnxruntime as ort
 from PIL import Image
 import numpy as np
 import os
 from tqdm import tqdm
 import requests
 import hashlib
 from typing import List, Union
 import shutil
 from pathlib import Path
 def process_image(image: Image.Image) -> np.ndarray:
    """
    Convert an image to a numpy array.
    :param image: the image to convert
    :return: the numpy array
    """
    image = image.convert("RGB").resize((512, 512))
    image = np.array(image).astype(np.float32) / 255
    image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
    return image
 def download(url: str, save_path: str, md5: str, length: str) -> bool:
    """
    Download a file from url to save_path.
    If the file already exists, check its md5.
    If the md5 matches, return True,if the md5 doesn't match, return False.
    :param url: the url of the file to download
    :param save_path: the path to save the file
    :param md5: the md5 of the file
    :param length: the length of the file
    :return: True if the file is downloaded successfully, False otherwise
    """
    try:
        response = requests.get(url=url, stream=True)
        with open(save_path, "wb") as f:
            with tqdm.wrapattr(
                response.raw, "read", total=length, desc="Downloading"
            ) as r_raw:
                shutil.copyfileobj(r_raw, f)
        return (
            True
            if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
            else False
        )
    except Exception as e:
        print(e)
        return False
 def download_model():
    """
    Download the model and tags file from the server.
    :return: the path to the model and tags file
    """
    model_url = (
        "https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
    )
    tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
    model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
    tags_md5 = "a3f764de985cdeba89f1d232a4204402"
    model_length = 643993025
    tags_length = 133810
    home = str(Path.home()) + "/.deepdanbooru_onnx/"
    if not os.path.exists(home):
        os.mkdir(home)
    model_name = "deepdanbooru.onnx"
    tags_name = "tags.txt"
    model_path = home + model_name
    tags_path = home + tags_name
    if os.path.exists(model_path):
        if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
            os.remove(model_path)
            if not download(model_url, model_path, model_md5, model_length):
                raise ValueError("Model download failed")
    else:
        if not download(model_url, model_path, model_md5, model_length):
            raise ValueError("Model download failed")
    if os.path.exists(tags_path):
        if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
            os.remove(tags_path)
            if not download(tags_url, tags_path, tags_md5, tags_length):
                raise ValueError("Tags download failed")
    else:
        if not download(tags_url, tags_path, tags_md5, tags_length):
            raise ValueError("Tags download failed")
    return model_path, tags_path
 class DeepDanbooru:
    def __init__(
        self,
        mode: str = "auto",
        model_path: Union[str, None] = None,
        tags_path: Union[str, None] = None,
        threshold: Union[float, int] = 0.6,
        pin_memory: bool = False,
        batch_size: int = 1,
    ):
        """
        Initialize the DeepDanbooru class.
        :param mode: the mode of the model, "cpu" or "gpu" or "auto"
        :param model_path: the path to the model file
        :param tags_path: the path to the tags file
        :param threshold: the threshold of the model
        :param pin_memory: whether to use pin memory
        :param batch_size: the batch size of the model
        """
        providers = {
            "cpu": "CPUExecutionProvider",
            "gpu": "CUDAExecutionProvider",
            "tensorrt": "TensorrtExecutionProvider",
            "auto": (
                "CUDAExecutionProvider"
                if "CUDAExecutionProvider" in ort.get_available_providers()
                else "CPUExecutionProvider"
            ),
        }
        if not (isinstance(threshold, float) or isinstance(threshold, int)):
            raise TypeError("threshold must be float or int")
        if threshold < 0 or threshold > 1:
            raise ValueError("threshold must be between 0 and 1")
        if mode not in providers:
            raise ValueError(
                "Mode not supported. Please choose from: cpu, gpu, tensorrt"
            )
        if providers[mode] not in ort.get_available_providers():
            raise ValueError(
                f"Your device is not supported {mode}. Please choose from: cpu"
            )
        if model_path is not None and not os.path.exists(model_path):
            raise FileNotFoundError("Model file not found")
        if tags_path is not None and not os.path.exists(tags_path):
            raise FileNotFoundError("Tags file not found")
        if model_path is None or tags_path is None:
            model_path, tags_path = download_model()
        self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
        self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
        self.input_name = self.session.get_inputs()[0].name
        self.output_name = [output.name for output in self.session.get_outputs()]
        self.threshold = threshold
        self.pin_memory = pin_memory
        self.batch_size = batch_size
        self.mode = mode
        self.cache = {}
    def __str__(self) -> str:
        return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
    def __repr__(self) -> str:
        return self.__str__()
    def from_image_inference(self, image: Image.Image) -> dict:
        image = process_image(image)
        return self.predict(image)
    def from_ndarray_inferece(self, image: np.ndarray) -> dict:
        if image.shape != (1, 512, 512, 3):
            raise ValueError(f"Image must be {(1, 512, 512, 3)}")
        return self.predict(image)
    def from_file_inference(self, image: str) -> dict:
        return self.from_image_inference(Image.open(image))
    def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
        if self.pin_memory:
            image = [process_image(Image.open(i)) for i in image]
        for i in [
            image[i : i + self.batch_size]
            for i in range(0, len(image), self.batch_size)
        ]:
            imagelist = i
            bs = len(i)
            _imagelist, idx, hashlist = [], [], []
            for j in range(len(i)):
                img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
                image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
                hashlist.append(image_hash)
                if image_hash in self.cache:
                    continue
                if not self.pin_memory:
                    _imagelist.append(process_image(img))
                else:
                    _imagelist.append(imagelist[j])
                idx.append(j)
            imagelist = _imagelist
            if len(imagelist) != 0:
                _image = np.vstack(imagelist)
                results = self.inference(_image)
                results_idx = 0
            else:
                results = []
            for i in range(bs):
                image_tag = {}
                if i in idx:
                    hash = hashlist[i]
                    for tag, score in zip(self.tags, results[results_idx]):
                        if score >= self.threshold:
                            image_tag[tag] = score
                    results_idx += 1
                    self.cache[hash] = image_tag
                    yield image_tag
                else:
                    yield self.cache[hashlist[i]]
    def inference(self, image):
        return self.session.run(self.output_name, {self.input_name: image})[0]
    def predict(self, image):
        result = self.inference(image)
        image_tag = {}
        for tag, score in zip(self.tags, result[0]):
            if score >= self.threshold:
                image_tag[tag] = score
        return image_tag
    def __call__(self, image) -> Union[dict, List[dict]]:
        if isinstance(image, str):
            return self.from_file_inference(image)
        elif isinstance(image, np.ndarray):
            return self.from_ndarray_inferece(image)
        elif isinstance(image, list) or isinstance(image, tuple):
            return self.from_list_inference(image)
        elif isinstance(image, Image.Image):
            return self.from_image_inference(image)
        else:
            raise ValueError("Image must be a file path or a numpy array or list/tuple")
--- a/DanbooruTagger/example.py
+++ b/DanbooruTagger/example.py
@ -0,0 +1,3 @@
 from deepdanbooru_onnx import DeepDanbooru
 danbooru = DeepDanbooru()
 print(danbooru("/run/media/philipp/20404acc-312c-44f2-b2d1-3a0a14257cc6/.Media/porn/00244-3145022840.png"))
--- a/PersonDatasetAssembler/PersonDatasetAssembler.py
+++ b/PersonDatasetAssembler/PersonDatasetAssembler.py
@ -0,0 +1,154 @@
 #!/bin/python3
 import argparse
 import os
 from typing import Iterator
 import cv2
 import numpy
 from tqdm import tqdm
 from wand.exceptions import BlobError
 from wand.image import Image
 image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
 image_ext_wand = [".dng", ".arw"]
 class LoadException(Exception):
 	pass
 def find_image_files(path: str) -> list[str]:
 	paths = list()
 	for root, dirs, files in os.walk(path):
 		for filename in files:
 			name, extension = os.path.splitext(filename)
 			if extension.lower() in image_ext_ocv or extension in image_ext_wand:
 				paths.append(os.path.join(root, filename))
 	return paths
 def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
 	for path in paths:
 		name, extension = os.path.splitext(path)
 		extension = extension.lower()
 		if extension in image_ext_ocv:
 			image = cv2.imread(path)
 			if image is None:
 				print(f"Warning: could not load {path}")
 			else:
 				yield image
 		elif extension in image_ext_wand:
 			try:
 				image = Image(filename=path)
 			except BlobError as e:
 				print(f"Warning: could not load {path}, {e}")
 				continue
 def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
 	ret = True
 	frame_counter = 0
 	while ret:
 		video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
 		ret, frame = video.read()
 		if ret:
 			yield frame
 		frame_counter += interval
 def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
 	detector.setInputSize([image.shape[1], image.shape[0]])
 	faces = detector.detect(image)[1]
 	if faces is None:
 		return 0, False
 	for face in faces:
 		cropped_image = recognizer.alignCrop(image, face)
 		features = recognizer.feature(cropped_image)
 		score_accum = 0.0
 		for referance in referance_features:
 			score_accum += recognizer.match(referance, features, 0)
 		score = score_accum / len(referance_features)
 		if score > thresh:
 			return score, True
 	return 0, False
 def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
 	images = list()
 	out = list()
 	if os.path.isfile(referance_path):
 		image = cv2.imread(referance_path)
 		if image is None:
 			print(f"Could not load image from {referance_path}")
 		else:
 			images.append(image)
 	elif os.path.isdir(referance_path):
 		filenames = find_image_files(referance_path)
 		images = list(image_loader(filenames))
 	for image in images:
 		detector.setInputSize([image.shape[1], image.shape[0]])
 		faces = detector.detect(image)[1]
 		if faces is None:
 			print("unable to find face in referance image")
 			exit(1)
 		image = recognizer.alignCrop(image, faces[0])
 		features = recognizer.feature(image)
 		out.append(features)
 	return out
 if __name__ == "__main__":
 	parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
 	parser.add_argument('--out', '-o', default="out", help="place to put dataset")
 	parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
 	parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
 	parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
 	parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
 	parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
 	parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
 	parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
 	args = parser.parse_args()
 	recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
 	detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
 		score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
 	referance_features = process_referance(detector, recognizer, args.referance)
 	if len(referance_features) < 1:
 		print(f"Could not load any referance image(s) from {args.referance}")
 		exit(1)
 	if os.path.isfile(args.input):
 		video = cv2.VideoCapture(args.input)
 		if not video.isOpened():
 			print(f"Unable to open {args.input} as a video file")
 			exit(1)
 		image_generator = extract_video_images(video, args.skip + 1)
 		total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
 	elif os.path.isdir(args.input):
 		image_filenams = find_image_files(args.input)
 		image_generator = image_loader(image_filenams)
 		total_images = len(image_filenams)
 	else:
 		print(f"{args.input} is not a video file nor is it a directory")
 		exit(1)
 	os.makedirs(args.out, exist_ok=True)
 	progress = tqdm(total=int(total_images), desc="0.00")
 	counter = 0
 	for image in image_generator:
 		if image.shape[0] > 512:
 			aspect = image.shape[0] / image.shape[1]
 			resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
 		else:
 			resized = image
 		score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
 		if match and not args.invert or not match and args.invert:
 			filename = f"{counter:04}.png"
 			cv2.imwrite(os.path.join(args.out, filename), image)
 			counter += 1
 		progress.set_description(f"{score:1.2f}")
 		progress.update()
--- a/SmartCrop/CMakeLists.txt
+++ b/SmartCrop/CMakeLists.txt
@ -0,0 +1,16 @@
 cmake_minimum_required(VERSION 3.6)
 find_package(OpenCV REQUIRED)
 set(CMAKE_CXX_STANDARD 17)
 set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
 add_executable(smartcrop ${SRC_FILES})
 target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
 target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_compile_options(smartcrop PRIVATE -s -g -Wall)
 message(WARNING ${WEIGHT_DIR})
 target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
 install(TARGETS smartcrop RUNTIME DESTINATION bin)
--- a/SmartCrop/facerecognizer.cpp
+++ b/SmartCrop/facerecognizer.cpp
@ -0,0 +1,143 @@
 #include "facerecognizer.h"
 #include <filesystem>
 #define INCBIN_PREFIX r
 #include "incbin.h"
 INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
 INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
 #include <opencv2/dnn/dnn.hpp>
 #include <opencv2/core.hpp>
 #include <opencv2/highgui.hpp>
 #include <fstream>
 #include "log.h"
 static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
 FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
 {
 	if(detectorPath.empty())
 	{
 		Log(Log::INFO)<<"Using builtin face detection model";
 		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 		if(!detector)
 			throw LoadException("Unable to load detector network from built in file");
 	}
 	else
 	{
 		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 		if(!detector)
 			throw LoadException("Unable to load detector network from "+detectorPath.string());
 	}
 	bool defaultNetwork = recognizerPath.empty();
 	if(defaultNetwork)
 	{
 		Log(Log::INFO)<<"Using builtin face recognition model";
 		recognizerPath = cv::tempfile("onnx");
 		std::ofstream file(recognizerPath);
 		if(!file.is_open())
 			throw LoadException("Unable open temporary file at "+recognizerPath.string());
 		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
 		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
 		file.close();
 	}
 	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
 	if(defaultNetwork)
 		std::filesystem::remove(recognizerPath);
 	if(!recognizer)
 		throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
 	addReferances(referances);
 }
 cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
 {
 	detector->setInputSize(input.size());
 	cv::Mat faces;
 	detector->detect(input, faces);
 	return faces;
 }
 bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
 {
 	bool ret = false;
 	for(const cv::Mat& image : referances)
 	{
 		cv::Mat faces = detectFaces(image);
 		assert(faces.cols == 15);
 		if(faces.empty())
 		{
 			Log(Log::WARN)<<"A referance image provided dose not contian any face";
 			continue;
 		}
 		if(faces.rows > 1)
 			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
 		cv::Mat cropedImage;
 		recognizer->alignCrop(image, faces.row(0), cropedImage);
 		cv::Mat features;
 		recognizer->feature(cropedImage, features);
 		referanceFeatures.push_back(features.clone());
 		ret = true;
 	}
 	return ret;
 }
 void FaceRecognizer::setThreshold(double threasholdIn)
 {
 	threshold = threasholdIn;
 }
 double FaceRecognizer::getThreshold()
 {
 	return threshold;
 }
 void FaceRecognizer::clearReferances()
 {
 	referanceFeatures.clear();
 }
 FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
 {
 	cv::Mat faces = detectFaces(input);
 	Detection bestMatch;
 	bestMatch.confidence = 0;
 	bestMatch.person = -1;
 	if(alone && faces.rows > 1)
 	{
 		bestMatch.person = -2;
 		return bestMatch;
 	}
 	for(int i = 0; i < faces.rows; ++i)
 	{
 		cv::Mat face;
 		recognizer->alignCrop(input, faces.row(i), face);
 		cv::Mat features;
 		recognizer->feature(face, features);
 		features = features.clone();
 		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
 		{
 			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
 			if(score > threshold && score > bestMatch.confidence)
 			{
 				bestMatch.confidence = score;
 				bestMatch.person = referanceIndex;
 				bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
 			}
 		}
 	}
 	return bestMatch;
 }
--- a/SmartCrop/facerecognizer.h
+++ b/SmartCrop/facerecognizer.h
@ -0,0 +1,48 @@
 #pragma once
 #include <exception>
 #include <opencv2/core/mat.hpp>
 #include <opencv2/objdetect/face.hpp>
 #include <opencv2/core.hpp>
 #include <vector>
 #include <memory>
 #include <filesystem>
 class FaceRecognizer
 {
 public:
 	struct Detection
 	{
 		int person;
 		float confidence;
 		cv::Rect rect;
 	};
 	class LoadException : public std::exception
 	{
 	private:
 		std::string message;
 	public:
 		LoadException(const std::string& msg): std::exception(), message(msg) {}
 		virtual const char* what() const throw() override
 		{
 			return message.c_str();
 		}
 	};
 private:
 	std::vector<cv::Mat> referanceFeatures;
 	std::shared_ptr<cv::FaceRecognizerSF> recognizer;
 	std::shared_ptr<cv::FaceDetectorYN> detector;
 	double threshold = 0.363;
 public:
 	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
 	cv::Mat detectFaces(const cv::Mat& input);
 	Detection isMatch(const cv::Mat& input, bool alone = false);
 	bool addReferances(const std::vector<cv::Mat>& referances);
 	void setThreshold(double threashold);
 	double getThreshold();
 	void clearReferances();
 };
--- a/SmartCrop/incbin.h
+++ b/SmartCrop/incbin.h
--- a/SmartCrop/intelligentroi.cpp
+++ b/SmartCrop/intelligentroi.cpp
@ -31,11 +31,12 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
 	}
 }
-cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
+cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
 {
-	int radius = std::min(imageSize.height, imageSize.width)/2;
+	incompleate = false;
 	int diameter = std::min(imageSize.height, imageSize.width);
 	cv::Point2i point(imageSize.width/2, imageSize.height/2);
-	cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2);
+	cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
 	std::sort(mustInclude.begin(), mustInclude.end(),
 		[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -43,8 +44,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
 	while(true)
 	{
 		cv::Rect includeRect = rectFromPoints(mustInclude);
-		if(includeRect.width-2 > radius || includeRect.height-2 > radius)
+		if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
 		{
 			incompleate = true;
 			slideRectToPoint(candiate, mustInclude.back().first);
 			mustInclude.pop_back();
 			Log(Log::DEBUG)<<"cant fill";
@ -52,7 +54,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
 				Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
 		}
 		else
 		{
 			break;
 		}
 	}
 	for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
@ -75,25 +79,30 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
 	personId = yolo.getClassForStr("person");
 }
-cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
+bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
 {
-	if(!detections.empty())
+	std::vector<std::pair<cv::Point2i, int>> corners;
 	for(size_t i = 0; i < detections.size(); ++i)
 	{
-		std::vector<std::pair<cv::Point2i, int>> corners;
+		int priority = detections[i].priority;
-		for(size_t i = 0; i < detections.size(); ++i)
+		if(detections[i].class_id == personId)
 		{
 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
 			corners.push_back({detections[i].box.tl(), priority+1});
 			corners.push_back({detections[i].box.br(), priority});
 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
 			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
 		}
 		else
 		{
 			int priority = detections[i].priority;
 			if(detections[i].class_id == personId)
 				corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
 			corners.push_back({detections[i].box.tl(), priority});
 			corners.push_back({detections[i].box.br(), priority});
 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
 			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
 		}
 		return maxRect(imageSize, corners);
 	}
-	Log(Log::DEBUG)<<"Using center crop as there are no detections";
+	bool incompleate;
-	return maxRect(imageSize);
+	out = maxRect(incompleate, imageSize, corners);
 	return incompleate;
 }
--- a/SmartCrop/intelligentroi.h
+++ b/SmartCrop/intelligentroi.h
@ -10,9 +10,9 @@ private:
 	int personId;
 	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
 	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
-	static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
+	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
 public:
 	InteligentRoi(const Yolo& yolo);
-	cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
+	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
 };
--- a/SmartCrop/log.cpp
+++ b/SmartCrop/log.cpp
--- a/SmartCrop/log.h
+++ b/SmartCrop/log.h
--- a/SmartCrop/main.cpp
+++ b/SmartCrop/main.cpp
@ -0,0 +1,440 @@
 #include <filesystem>
 #include <iostream>
 #include <opencv2/core.hpp>
 #include <opencv2/core/types.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 #include <algorithm>
 #include <execution>
 #include <string>
 #include <vector>
 #include <numeric>
 #include "yolo.h"
 #include "log.h"
 #include "options.h"
 #include "utils.h"
 #include "intelligentroi.h"
 #include "seamcarving.h"
 #include "facerecognizer.h"
 const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
 {
 	const Yolo::Detection* inDetection = nullptr;
 	for(const Yolo::Detection& detection : detections)
 	{
 		if(ignore && ignore == &detection)
 			continue;
 		if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
 		{
 			if(!inDetection || detection.box.br().x > inDetection->box.br().x)
 			inDetection = &detection;
 		}
 	}
 	return inDetection;
 }
 bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
 {
 	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
 	Log(Log::DEBUG, false)<<__func__<<" point "<<x;
 	if(!inDetection)
 	{
 		const Yolo::Detection* closest = nullptr;
 		for(const Yolo::Detection& detection : detections)
 		{
 			if(detection.box.x > x)
 			{
 				if(closest == nullptr || detection.box.x-x > closest->box.x-x)
 					closest = &detection;
 			}
 		}
 		if(closest)
 			x = closest->box.x;
 		else
 			x = imgSizeX;
 		Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
 		return false;
 	}
 	else
 	{
 		x = inDetection->box.br().x;
 		Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
 		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
 		if(candidateDetection && candidateDetection->box.br().x > x)
 		{
 			Log(Log::DEBUG)<<"it is again in a box";
 			return findRegionEndpointHoriz(x, detections, imgSizeX);
 		}
 		else
 		{
 			Log(Log::DEBUG)<<"it is not in a box";
 			return true;
 		}
 	}
 }
 std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
 {
 	std::vector<std::pair<cv::Mat, bool>> out;
 	std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
 	for(int x = 0; x < image.cols; ++x)
 	{
 		int start = x;
 		bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
 		int width = x-start;
 		if(x < image.cols)
 			++width;
 		cv::Rect rect(start, 0, width, image.rows);
 		Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
 		cv::Mat slice = image(rect);
 		out.push_back({slice, frozen});
 	}
 	return out;
 }
 cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
 {
 	assert(!slices.empty());
 	int cols = 0;
 	for(const std::pair<cv::Mat, bool>& slice : slices)
 		cols += slice.first.cols;
 	cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
 	Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
 	int col = 0;
 	for(const std::pair<cv::Mat, bool>& slice : slices)
 	{
 		cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
 		Log(Log::DEBUG)<<__func__<<' '<<rect;
 		slice.first.copyTo(image(rect));
 		col += slice.first.cols-1;
 	}
 	return image;
 }
 void transposeRect(cv::Rect& rect)
 {
 	int x = rect.x;
 	rect.x = rect.y;
 	rect.y = x;
 	int width = rect.width;
 	rect.width = rect.height;
 	rect.height = width;
 }
 bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
 {
 	detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
 	double aspectRatio = image.cols/static_cast<double>(image.rows);
 	Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
 	bool vertical = false;
 	if(aspectRatio > targetAspectRatio)
 		vertical = true;
 	int requiredLines = 0;
 	if(!vertical)
 		requiredLines = image.rows*targetAspectRatio - image.cols;
 	else
 		requiredLines = image.cols/targetAspectRatio - image.rows;
 	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
 	if(vertical)
 	{
 		cv::transpose(image, image);
 		for(Yolo::Detection& detection : detections)
 			transposeRect(detection.box);
 	}
 	std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
 	Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
 	int totalResizableSize = 0;
 	for(const std::pair<cv::Mat, bool>& slice : slices)
 	{
 		Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
 		if(!slice.second)
 			totalResizableSize += slice.first.cols;
 	}
 	if(totalResizableSize < requiredLines+1)
 	{
 		Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
 		if(vertical)
 			cv::transpose(image, image);
 		return false;
 	}
 	std::vector<int> seamsForSlice(slices.size(), 0);
 	for(size_t i = 0; i < slices.size(); ++i)
 	{
 		if(!slices[i].second)
 			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
 	}
 	int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
 	for(ssize_t i = slices.size()-1; i >= 0; --i)
 	{
 		if(!slices[i].second)
 		{
 			seamsForSlice[i] += residual;
 			break;
 		}
 	}
 	for(size_t i = 0; i < slices.size(); ++i)
 	{
 		if(seamsForSlice[i] != 0)
 		{
 			bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
 			if(!ret)
 			{
 				if(vertical)
 					transpose(image, image);
 				return false;
 			}
 		}
 	}
 	image = assembleFromSlicesHoriz(slices);
 	if(vertical)
 		cv::transpose(image, image);
 	return true;
 }
 void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
 {
 	for(const Yolo::Detection& detection : detections)
 	{
 		cv::rectangle(image, detection.box, detection.color, 3);
 		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
 		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
 		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
 		cv::rectangle(image, textBox, detection.color, cv::FILLED);
 		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
 	}
 	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
 }
 static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
 {
 	int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
 	if(std::max(image.cols, image.rows) > longTargetSize)
 	{
 		if(image.cols > image.rows)
 		{
 			double ratio = static_cast<double>(longTargetSize)/image.cols;
 			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
 		}
 		else
 		{
 			double ratio = static_cast<double>(longTargetSize)/image.rows;
 			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
 		}
 	}
 }
 void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
 	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
 {
 	InteligentRoi intRoi(yolo);
 	cv::Mat image = cv::imread(path);
 	if(!image.data)
 	{
 		Log(Log::WARN)<<"could not load image "<<path<<" skipping";
 		return;
 	}
 	reduceSize(image, config.targetSize);
 	std::vector<Yolo::Detection> detections = yolo.runInference(image);
 	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
 	for(Yolo::Detection& detection : detections)
 	{
 		bool hasmatch = false;
 		if(recognizer && detection.className == "person")
 		{
 			cv::Mat person = image(detection.box);
 			reconizerMutex.lock();
 			FaceRecognizer::Detection match = recognizer->isMatch(person);
 			reconizerMutex.unlock();
 			if(match.person >= 0)
 			{
 				detection.priority += 10;
 				hasmatch = true;
 				detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
 			}
 		}
 		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
 	}
 	cv::Rect crop;
 	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
 	if(config.seamCarving && incompleate)
 	{
 		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
 		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
 		{
 			detections = yolo.runInference(image);
 		}
 	}
 	cv::Mat croppedImage;
 	if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
 	{
 		intRoi.getCropRectangle(crop, detections, image.size());
 		if(config.debug)
 		{
 			cv::Mat debugImage = image.clone();
 			drawDebugInfo(debugImage, crop, detections);
 			bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
 			if(!ret)
 				Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
 		}
 		croppedImage = image(crop);
 	}
 	else if(!incompleate)
 	{
 		croppedImage = image(crop);
 	}
 	else
 	{
 		croppedImage = image;
 	}
 	cv::Mat resizedImage;
 	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
 	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
 	if(!ret)
 		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
 }
 void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
 		std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
 {
 	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
 	for(std::filesystem::path path : images)
 		pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
 }
 template<typename T>
 std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
 {
 	std::vector<std::vector<T>> out;
 	size_t length = vec.size()/parts;
 	size_t remain = vec.size() % parts;
 	size_t begin = 0;
 	size_t end = 0;
 	for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
 	{
 		end += (remain > 0) ? (length + !!(remain--)) : length;
 		out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
 		begin = end;
 	}
 	return out;
 }
 int main(int argc, char* argv[])
 {
 	Log::level = Log::INFO;
 	Config config;
 	argp_parse(&argp, argc, argv, 0, 0, &config);
 	if(config.outputDir.empty())
 	{
 		Log(Log::ERROR)<<"a output path \"-o\" is required";
 		return 1;
 	}
 	if(config.imagePaths.empty())
 	{
 		Log(Log::ERROR)<<"at least one input image or directory is required";
 		return 1;
 	}
 	std::vector<std::filesystem::path> imagePaths;
 	for(const std::filesystem::path& path : config.imagePaths)
 		getImageFiles(path, imagePaths);
 	Log(Log::DEBUG)<<"Images:";
 	for(const::std::filesystem::path& path: imagePaths)
 		Log(Log::DEBUG)<<path;
 	if(imagePaths.empty())
 	{
 		Log(Log::ERROR)<<"no image was found\n";
 		return 1;
 	}
 	if(!std::filesystem::exists(config.outputDir))
 	{
 		if(!std::filesystem::create_directory(config.outputDir))
 		{
 			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
 			return 1;
 		}
 	}
 	std::filesystem::path debugOutputPath(config.outputDir/"debug");
 	if(config.debug)
 	{
 		if(!std::filesystem::exists(debugOutputPath))
 			std::filesystem::create_directory(debugOutputPath);
 	}
 	FaceRecognizer* recognizer = nullptr;
 	std::mutex recognizerMutex;
 	if(!config.focusPersonImage.empty())
 	{
 		cv::Mat personImage = cv::imread(config.focusPersonImage);
 		if(personImage.empty())
 		{
 			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
 			return 1;
 		}
 		recognizer = new FaceRecognizer();
 		recognizer->addReferances({personImage});
 		recognizer->setThreshold(config.threshold);
 	}
 	std::vector<std::thread> threads;
 	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
 	for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
 		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
 	for(std::thread& thread : threads)
 		thread.join();
 	return 0;
 }
--- a/SmartCrop/options.h
+++ b/SmartCrop/options.h
@ -0,0 +1,98 @@
 #pragma once
 #include <string>
 #include <vector>
 #include <argp.h>
 #include <iostream>
 #include <filesystem>
 #include <opencv2/core/types.hpp>
 #include "log.h"
 const char *argp_program_version = "AIImagePreprocesses";
 const char *argp_program_bug_address = "<carl@uvos.xyz>";
 static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
 static char args_doc[] = "FILE(S)";
 static struct argp_option options[] =
 {
  {"verbose",		'v', 0,				0,	"Show debug messages" },
  {"quiet", 		'q', 0,				0,	"only output data" },
  {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" },
  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
  {"debug", 		'd', 0,				0,	"output debug images" },
  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
  {0}
 };
 struct Config
 {
 	std::vector<std::filesystem::path> imagePaths;
 	std::filesystem::path modelPath;
 	std::filesystem::path classesPath;
 	std::filesystem::path outputDir;
 	std::filesystem::path focusPersonImage;
 	bool seamCarving = false;
 	bool debug = false;
 	double threshold = 0.363;
 	cv::Size targetSize = cv::Size(512, 512);
 };
 static error_t parse_opt (int key, char *arg, struct argp_state *state)
 {
 	Config *config = reinterpret_cast<Config*>(state->input);
 	try
 	{
 		switch (key)
 		{
 		case 'q':
 			Log::level = Log::ERROR;
 			break;
 		case 'v':
 			Log::level = Log::DEBUG;
 			break;
 		case 'm':
 			config->modelPath = arg;
 			break;
 		case 'c':
 			config->classesPath = arg;
 			break;
 		case 'd':
 			config->debug = true;
 			break;
 		case 'o':
 			config->outputDir.assign(arg);
 			break;
 		case 's':
 			config->seamCarving = true;
 			break;
 		case 'f':
 			config->focusPersonImage = arg;
 			break;
 		case 't':
 			config->threshold = std::atof(arg);
 			break;
 		case 'z':
 		{
 			int x = std::stoi(arg);
 			config->targetSize = cv::Size(x, x);
 			break;
 		}
 		case ARGP_KEY_ARG:
 			config->imagePaths.push_back(arg);
 			break;
 		default:
 			return ARGP_ERR_UNKNOWN;
 		}
 	}
 	catch(const std::invalid_argument& ex)
 	{
 		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
 		return ARGP_KEY_ERROR;
 	}
 	return 0;
 }
 static struct argp argp = {options, parse_opt, args_doc, doc};
--- a/SmartCrop/readfile.h
+++ b/SmartCrop/readfile.h
--- a/SmartCrop/seamcarving.cpp
+++ b/SmartCrop/seamcarving.cpp
@ -1,19 +1,19 @@
 #include "seamcarving.h"
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc.hpp>
 #include <iostream>
 #if __cplusplus >= 201703L
 #include <filesystem>
 #endif
 #include <cfloat>
 #include <vector>
 #include "log.h"
-SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) :
+bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
 	image(img), seams(seams), grow(grow) {}
 void SeamCarving::init()
 {
 	cv::Mat newFrame = image.clone();
 	assert(!newFrame.empty());
 	std::vector<std::vector<int>> vecSeams;
 	for(int i = 0; i < seams; i++)
 	{
@ -24,230 +24,55 @@ void SeamCarving::init()
 		cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
 		if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
-		{
+			return false;
 			finalImage = image;
 			break;
 		}
 		std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
 		vecSeams.push_back(seam);
 		if(seamsVect)
 			seamsVect->push_back(seam);
-		newFrame = removeLeastImportantPath(newFrame,seam);
+		newFrame = removeLeastImportantPath(newFrame, seam);
-		if(newFrame.rows == 0 && newFrame.cols == 0)
+		if(newFrame.rows == 0 || newFrame.cols == 0)
-		{
+			return false;
 			finalImage = image;
 			break;
 		}
 	}
 	if (grow)
 	{
 		cv::Mat growMat = image.clone();
-		for (int i = 0; i < vecSeams.size(); i++)
+		for(size_t i = 0; i < vecSeams.size(); i++)
 		{
 			growMat = addLeastImportantPath(growMat,vecSeams[i]);
 		}
-		finalImage = growMat;
+		image = growMat;
 	}
 	else
 	{
-		finalImage = newFrame;
+		image = newFrame;
 	}
-
+	return true;
 	sliderPos = seams;
 }
-void SeamCarving::computeNewFinalImage(int sliderPos)
+bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
 {
-	if(sliderPos == 0)
+	cv::transpose(image, image);
-	{
+	bool ret = strechImage(image, seams, grow, seamsVect);
-		finalImage =  image;
+	cv::transpose(image, image);
-		return;
+	return ret;
 	}
 	if(sliderPos < 1 || sliderPos >= sliderMax-1)
 	{
 		return;
 	}
 	if(sliderPos > vecSeams.size())
 	{
 		cv::Mat newFrame = finalImage.clone();
 		for(int i = vecSeams.size()-1; i < sliderPos; i++)
 		{
 			//Gradient Magnitude for intensity of image.
 			cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
 			//Use DP to create the real energy map that is used for path calculation.
 			// Strictly using vertical paths for testing simplicity.
 			cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
 			if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
 			{
 				finalImage = image;
 				break;
 			}
 			std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
 			vecSeams.push_back(seam);
 			newFrame = removeLeastImportantPath(newFrame,seam);
 			if(newFrame.rows == 0 && newFrame.cols == 0)
 			{
 				finalImage = image;
 				break;
 			}
 		}
 		if (grow)
 		{
 			cv::Mat growMat = image.clone();
 			for (int i = 0; i < vecSeams.size(); i++)
 			{
 				growMat = addLeastImportantPath(growMat,vecSeams[i]);
 			}
 			finalImage = growMat;
 		}
 		else
 		{
 			finalImage = newFrame;
 		}
 	}
 	else if (sliderPos < vecSeams.size())
 	{
 		cv::Mat newFrame = image.clone();
 		for(int i = 0; i < sliderPos; i++)   // TODO check if it is faster to add seams back (probably not)
 		{
 			if (grow)
 			{
 				newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
 			}
 			else
 			{
 				newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
 			}
 			if(newFrame.rows == 0 && newFrame.cols == 0)
 			{
 				finalImage = image;
 				break;
 			}
 		}
 		finalImage = newFrame;
 	}
 }
-const cv::Mat& SeamCarving::getFinalImage()
+bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
 {
-	return finalImage;
+	std::vector<std::vector<int>> seamsVect;
-}
+	seamsImage = image.clone();
-void SeamCarving::showSeamsImg()
+	bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
-{
+	if(!ret)
-	cv::Mat seamsFrame = image.clone();
+		return false;
 	//std::cout << "sliderPos: " << sliderPos << std::endl;
 	for(int i = 0; i < sliderPos; i++)
 	{
 		seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
 	}
 	cv::imwrite("output/seams_image.jpg", seamsFrame);
 	cv::imshow( "Image Seams", seamsFrame);
 }
-static void onChange( int pos, void* object )
+	for(size_t i = 0; i < seamsVect.size(); ++i)
-{
+		seamsImage = drawSeam(seamsImage, seamsVect[i]);
-	SeamCarving* sc = (SeamCarving*)(object);
+	return true;
 	/*if(sc->getBlockUpdateStatus()) {
 	    return;
 	}*/
 	sc->computeNewFinalImage(pos);
 	imshow("Final Image", sc->getFinalImage());
 #if DEBUG
 	sc->showSeamsImg();
 #endif
 }
 static void onMouse( int event, int x, int y, int, void* object)
 {
 	SeamCarving* sc = (SeamCarving*)(object);
 	if( event == cv::EVENT_LBUTTONDOWN ||
 	        event == cv::EVENT_RBUTTONDOWN ||
 	        event == cv::EVENT_MBUTTONDOWN
 	  )
 	{
 		sc->setBlockUpdate(true);
 	}
 	else if(event == cv::EVENT_LBUTTONUP ||
 	        event == cv::EVENT_RBUTTONUP ||
 	        event == cv::EVENT_MBUTTONUP)
 	{
 		sc->setBlockUpdate(false);
 	}
 }
 void SeamCarving::setBlockUpdate(bool bUpdate)
 {
 	blockUpdate = bUpdate;
 }
 bool SeamCarving::getBlockUpdateStatus()
 {
 	return blockUpdate;
 }
 void SeamCarving::showImage()
 {
 #if __cplusplus >= 201703L
 	if(!std::filesystem::exists("output"))
 	{
 		std::filesystem::create_directory("output");
 	}
 #endif
 	if( image.empty() )
 	{
 		std::cout <<  "Could not open raw image" << std::endl ;
 		return;
 	}
 	namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
 	cv::imshow( "Raw Image", image );
 	if( finalImage.empty() )
 	{
 		std::cout <<  "Could not open final image" << std::endl ;
 		return;
 	}
 #if DEBUG
 	namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
 	cv::Mat gradient = computeGradientMagnitude(image);
 	cv::Mat u8_image;
 	gradient.convertTo(u8_image, CV_8U);
 	cv::imwrite("output/gradient_image.jpg", u8_image);
 	cv::imshow("gradient Image", u8_image);
 	namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
 	cv::Mat u8_image2;
 	cv::Mat intensityMat = computePathIntensityMat(gradient);
 	cv::Mat dst;
 	cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
 	dst.convertTo(u8_image2, CV_8U);
 	cv::imwrite("output/intensity_image.jpg", u8_image2);
 	cv::imshow( "intensity Image", u8_image2);
 	//cv::Mat engImg = GetEnergyImg(image);
 	//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
 	//cv::Mat u8_image3;
 	//engImg.convertTo(u8_image3, CV_8U);
 	//cv::imshow( "energy Image", u8_image3);
 	namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
 	showSeamsImg();
 #endif
 	namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
 	cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
 	//cv::setMouseCallback("Final Image", onMouse, this );
 	cv::imwrite("output/final_image.jpg", finalImage);
 	cv::imshow("Final Image", finalImage);
 	cv::waitKey(0);
 }
 cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
@ -392,9 +217,7 @@ cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std
 	cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
 	cv::Mat newMat = cv::Mat(size, original.type());
-	unsigned char *rawOrig = original.data;
+	for(size_t row = 0; row < seam.size(); row++)
 	unsigned char *rawOutput = newMat.data;
 	for(int row = 0; row < seam.size(); row++)
 	{
 		removePixel(original, newMat, row, seam[row]);
 	}
@ -460,9 +283,7 @@ cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::v
 	cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
 	cv::Mat newMat = cv::Mat(size, original.type());
-	unsigned char *rawOrig = original.data;
+	for(size_t row = 0; row < seam.size(); row++)
 	unsigned char *rawOutput = newMat.data;
 	for(int row = 0; row < seam.size(); row++)
 	{
 		//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
 		addPixel(original, newMat, row, seam[row]);
@ -518,3 +339,18 @@ void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row,
 		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
 	}
 }
 cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
 {
 	cv::Mat retMat = frame.clone();
 	for(int row = 0; row < frame.rows; row++)
 	{
 		for(int col = 0; col < frame.cols; col++)
 		{
 			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
 			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
 			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
 		}
 	}
 	return retMat;
 }
--- a/SmartCrop/seamcarving.h
+++ b/SmartCrop/seamcarving.h
@ -0,0 +1,24 @@
 #pragma once
 #include <opencv2/core/core.hpp>
 #include <vector>
 class SeamCarving
 {
 private:
 	static cv::Mat GetEnergyImg(const cv::Mat &img);
 	static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
 	static float intensity(float currIndex, int start, int end);
 	static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
 	static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
 	static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
 	static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
 	static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
 	static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
 	static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
 public:
 	static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
 	static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
 	static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
 };
--- a/SmartCrop/tokenize.cpp
+++ b/SmartCrop/tokenize.cpp
--- a/SmartCrop/tokenize.h
+++ b/SmartCrop/tokenize.h
--- a/SmartCrop/utils.cpp
+++ b/SmartCrop/utils.cpp
--- a/SmartCrop/utils.h
+++ b/SmartCrop/utils.h
--- a/SmartCrop/yolo.cpp
+++ b/SmartCrop/yolo.cpp
@ -11,8 +11,8 @@
 #define INCBIN_PREFIX r
 #include "incbin.h"
-INCTXT(defaultClasses, "../classes.txt");
+INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
-INCBIN(defaultModel, "../yolov8x.onnx");
+INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
 Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
 		const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
@ -22,6 +22,7 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
 	if(classesTxtFilePath.empty())
 	{
 		Log(Log::INFO)<<"Using builtin classes";
 		loadClasses(rdefaultClassesData);
 	}
 	else
@ -31,19 +32,21 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
 	}
 	if(!modelPath.empty())
 	{
 		net = cv::dnn::readNetFromONNX(modelPath);
 	}
 	else
 	{
 		Log(Log::INFO)<<"Using builtin yolo model";
 		net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
-
+	}
 	if(runWithOCl)
 	{
 		std::cout << "\nRunning on OCV" << std::endl;
 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
 		net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
 	}
 	else
 	{
 		std::cout << "\nRunning on CPU" << std::endl;
 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
 		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
 	}
@ -176,14 +179,33 @@ std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
 		result.className = classes[result.class_id].first;
 		result.priority = classes[result.class_id].second;
 		clampBox(boxes[idx], input.size());
 		result.box = boxes[idx];
 		detections.push_back(result);
 	}
 	return detections;
 }
 void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
 {
 	if(box.x < 0)
 	{
 		box.width += box.x;
 		box.x = 0;
 	}
 	if(box.y < 0)
 	{
 		box.height += box.y;
 		box.y = 0;
 	}
 	if(box.x+box.width > size.width)
 		box.width = size.width - box.x;
 	if(box.y+box.height > size.height)
 		box.height = size.height - box.y;
 }
 void Yolo::loadClasses(const std::string& classesStr)
 {
 	std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
--- a/SmartCrop/yolo.h
+++ b/SmartCrop/yolo.h
@ -27,19 +27,16 @@ private:
 	static constexpr float modelScoreThreshold = 0.45;
 	static constexpr float modelNMSThreshold = 0.50;
 	std::string modelPath;
 	std::vector<std::pair<std::string, int>> classes;
 	cv::Size2f modelShape;
 	bool letterBoxForSquare = true;
 	cv::dnn::Net net;
 	void loadClasses(const std::string& classes);
 	void loadOnnxNetwork(const std::filesystem::path& path);
 	cv::Mat formatToSquare(const cv::Mat &source);
-
+	static void clampBox(cv::Rect& box, const cv::Size& size);
 	std::string modelPath;
 	std::vector<std::pair<std::string, int>> classes;
 	cv::Size2f modelShape;
 	bool letterBoxForSquare = true;
 	cv::dnn::Net net;
 public:
 	Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
--- a/Weights/classes.txt
+++ b/Weights/classes.txt
@ -0,0 +1,80 @@
 person, 10
 bicycle, 4
 car, 3
 motorcycle, 4
 airplane, 4
 bus, 4
 train, 4
 truck, 3
 boat, 4
 traffic light, 1
 fire hydrant, 1
 stop sign, 1
 parking meter, 1
 bench, 2
 bird, 5
 cat, 6
 dog, 5
 horse, 4
 sheep, 5
 cow, 4
 elephant, 5
 bear, 5
 zebra, 5
 giraffe, 5
 backpack, 3
 umbrella, 3
 handbag, 3
 tie, 3
 suitcase, 2
 frisbee, 3
 skis, 3
 snowboard, 3
 sports ball, 3
 kite, 4
 baseball bat, 3
 baseball glove, 3
 skateboard, 3
 surfboard, 3
 tennis racket, 3
 bottle, 2
 wine glass, 2
 cup, 2
 fork, 1
 knife, 1
 spoon, 1
 bowl, 1
 banana, 1
 apple, 1
 sandwich,1
 orange, 1
 broccoli, 1
 carrot, 1
 hot dog, 1
 pizza, 1
 donut, 2
 cake, 2
 chair, 1
 couch, 1
 potted plant, 1
 bed, 1
 dining table, 1
 toilet, 1
 tv, 1
 laptop, 1
 mouse, 1
 remote, 1
 keyboard, 1
 cell phone, 1
 microwave, 1
 oven, 1
 toaster, 1
 sink, 1
 refrigerator, 1
 book, 1
 clock, 1
 vase, 1
 scissors, 1
 teddy bear, 1
 hair drier, 1
 toothbrush, 1
--- a/Weights/face_detection_yunet_2023mar.onnx
+++ b/Weights/face_detection_yunet_2023mar.onnx
--- a/Weights/face_recognition_sface_2021dec.onnx
+++ b/Weights/face_recognition_sface_2021dec.onnx
--- a/Weights/yolov8x.onnx
+++ b/Weights/yolov8x.onnx
--- a/main.cpp
+++ b/main.cpp
@ -1,295 +0,0 @@
 #include <filesystem>
 #include <iostream>
 #include <opencv2/core/types.hpp>
 #include <opencv2/imgproc.hpp>
 #include <algorithm>
 #include <vector>
 #include "yolo.h"
 #include "log.h"
 #include "options.h"
 #include "utils.h"
 #include "intelligentroi.h"
 const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
 {
 	const Yolo::Detection* inDetection = nullptr;
 	for(const Yolo::Detection& detection : detections)
 	{
 		if(!ignore || ignore != &detection)
 			continue;
 		if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
 		{
 			if(!inDetection || detection.box.br().x > inDetection->box.br().x)
 			inDetection = &detection;
 		}
 	}
 	return inDetection;
 }
 bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
 {
 	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
 	if(!inDetection)
 	{
 		const Yolo::Detection* closest = nullptr;
 		for(const Yolo::Detection& detection : detections)
 		{
 			if(detection.box.x > x)
 			{
 				if(closest == nullptr || detection.box.x-x > closest->box.x-x)
 					closest = &detection;
 			}
 		}
 		if(closest)
 			x = closest->box.x;
 		else
 			x = imgSizeX;
 		return false;
 	}
 	else
 	{
 		x = inDetection->box.br().x;
 		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
 		if(candidateDetection && candidateDetection->box.br().x > x)
 			return findRegionEndpointHoriz(x, detections, imgSizeX);
 		else
 			return true;
 	}
 }
 std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
 {
 	std::vector<std::pair<cv::Mat, bool>> out;
 	for(int x = 0; x < image.cols; ++x)
 	{
 		int start = x;
 		bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
 		cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
 		out.push_back({slice, frozen});
 	}
 	return out;
 }
 const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
 {
 	const Yolo::Detection* inDetection = nullptr;
 	for(const Yolo::Detection& detection : detections)
 	{
 		if(!ignore || ignore != &detection)
 			continue;
 		if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
 		{
 			if(!inDetection || detection.box.br().y > inDetection->box.br().y)
 			inDetection = &detection;
 		}
 	}
 	return inDetection;
 }
 bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
 {
 	const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
 	if(!inDetection)
 	{
 		const Yolo::Detection* closest = nullptr;
 		for(const Yolo::Detection& detection : detections)
 		{
 			if(detection.box.y > y)
 			{
 				if(closest == nullptr || detection.box.y-y > closest->box.y-y)
 					closest = &detection;
 			}
 		}
 		if(closest)
 			y = closest->box.y;
 		else
 			y = imgSizeY;
 		return false;
 	}
 	else
 	{
 		y = inDetection->box.br().y;
 		const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
 		if(candidateDetection && candidateDetection->box.br().y > y)
 			return findRegionEndpointVert(y, detections, imgSizeY);
 		else
 			return true;
 	}
 }
 std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
 {
 	std::vector<std::pair<cv::Mat, bool>> out;
 	for(int y = 0; y < image.rows; ++y)
 	{
 		int start = y;
 		bool frozen = findRegionEndpointVert(y, detections, image.rows);
 		cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
 		out.push_back({slice, frozen});
 	}
 	return out;
 }
 bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
 {
 	double aspectRatio = image.cols/static_cast<double>(image.rows);
 	bool vertical = false;
 	cv::Mat workImage;
 	if(aspectRatio > targetAspectRatio)
 		vertical = true;
 	int requiredLines = 0;
 	if(!vertical)
 		requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
 	else
 		requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
 	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
 	if(!vertical)
 	{
 		std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
 		int totalResizableSize = 0;
 		for(const std::pair<cv::Mat, bool>& slice : slices)
 		{
 			if(slice.second)
 				totalResizableSize += slice.first.cols;
 		}
 		std::vector<int> seamsForSlice(slices.size());
 		for(size_t i = 0; i < slices.size(); ++i)
 		{
 			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
 		}
 	}
 	else
 	{
 		int totalResizableSize = 0;
 		std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
 	}
 }
 void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
 {
 	for(const Yolo::Detection& detection : detections)
 	{
 		cv::rectangle(image, detection.box, detection.color, 4);
 		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
 		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
 		cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
 		cv::rectangle(image, textBox, detection.color, cv::FILLED);
 		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
 	}
 	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
 }
 int main(int argc, char* argv[])
 {
 	Log::level = Log::INFO;
 	Config config;
 	argp_parse(&argp, argc, argv, 0, 0, &config);
 	if(config.outputDir.empty())
 	{
 		Log(Log::ERROR)<<"a output path \"-o\" is required";
 		return 1;
 	}
 	if(config.imagePaths.empty())
 	{
 		Log(Log::ERROR)<<"at least one input image or directory is required";
 		return 1;
 	}
 	std::vector<std::filesystem::path> imagePaths;
 	for(const std::filesystem::path& path : config.imagePaths)
 		getImageFiles(path, imagePaths);
 	if(imagePaths.empty())
 	{
 		Log(Log::ERROR)<<"no image was found\n";
 		return 1;
 	}
 	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
 	InteligentRoi intRoi(yolo);
 	if(!std::filesystem::exists(config.outputDir))
 	{
 		if(!std::filesystem::create_directory(config.outputDir))
 		{
 			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
 			return 1;
 		}
 	}
 	std::filesystem::path debugOutputPath(config.outputDir/"debug");
 	if(config.debug)
 	{
 		if(!std::filesystem::exists(debugOutputPath))
 			std::filesystem::create_directory(debugOutputPath);
 	}
 	for(const std::filesystem::path& path : imagePaths)
 	{
 		cv::Mat image = cv::imread(path);
 		if(!image.data)
 		{
 			Log(Log::WARN)<<"could not load image "<<path<<" skipping";
 			continue;
 		}
 		if(std::max(image.cols, image.rows) > 1024)
 		{
 			if(image.cols > image.rows)
 			{
 				double ratio = 1024.0/image.cols;
 				cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
 			}
 			else
 			{
 				double ratio = 1024.0/image.rows;
 				cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
 			}
 		}
 		std::vector<Yolo::Detection> detections = yolo.runInference(image);
 		Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
 		for(const Yolo::Detection& detection : detections)
 			Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
 		cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
 		cv::Mat debugImage = image.clone();
 		drawDebugInfo(debugImage, crop, detections);
 		bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
 		if(!ret)
 			Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
 		cv::Mat croppedImage = image(crop);
 		cv::Mat resizedImage;
 		cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
 		ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
 		if(!ret)
 			Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
 	}
 	return 0;
 }
--- a/options.h
+++ b/options.h
@ -1,70 +0,0 @@
 #pragma once
 #include <string>
 #include <vector>
 #include <argp.h>
 #include <iostream>
 #include <filesystem>
 #include "log.h"
 const char *argp_program_version = "AIImagePreprocesses";
 const char *argp_program_bug_address = "<carl@uvos.xyz>";
 static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
 static char args_doc[] = "[IMAGES]";
 static struct argp_option options[] =
 {
  {"verbose",		'v', 0,				0,	"Show debug messages" },
  {"quiet", 		'q', 0,				0,	"only output data" },
  {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" },
  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
  {"debug", 		'd', 0,				0,	"output debug images" },
  {"seam-carving", 	's', 0,				0,	"model to train: "}
 };
 struct Config
 {
 	std::vector<std::filesystem::path> imagePaths;
 	std::filesystem::path modelPath;
 	std::filesystem::path classesPath;
 	std::filesystem::path outputDir;
 	bool seamCarving = false;
 	bool debug = false;
 };
 static error_t parse_opt (int key, char *arg, struct argp_state *state)
 {
 	Config *config = reinterpret_cast<Config*>(state->input);
 	switch (key)
 	{
 	case 'q':
 		Log::level = Log::ERROR;
 		break;
 	case 'v':
 		Log::level = Log::DEBUG;
 		break;
 	case 'm':
 		config->modelPath = arg;
 		break;
 	case 'c':
 		config->classesPath = arg;
 		break;
 	case 'd':
 		config->debug = true;
 		break;
 	case 'o':
 		config->outputDir.assign(arg);
 		break;
 	case 's':
 		config->seamCarving = true;
 		break;
 	case ARGP_KEY_ARG:
 		config->imagePaths.push_back(arg);
 		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
 	return 0;
 }
 static struct argp argp = {options, parse_opt, args_doc, doc};
--- a/seamcarving.h
+++ b/seamcarving.h
@ -1,61 +0,0 @@
 #ifndef __SEAM__CARVING_HPP__
 #define __SEAM__CARVING_HPP__
 #include <opencv2/core/core.hpp>
 #define DEBUG 0
 class SeamCarving {
  public:
    void showImage();
    const cv::Mat& getFinalImage();
    virtual void computeNewFinalImage(int pos);
    void setBlockUpdate(bool bUpdate);
    bool getBlockUpdateStatus();
    virtual void showSeamsImg();
  protected:
    SeamCarving(const cv::Mat &img, int seams, bool grow);
    void init();
    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
    cv::Mat image;
    cv::Mat finalImage;
    int seams;
    bool grow;
    int sliderMax;
    int sliderPos;
    std::vector<std::vector<int>> vecSeams;
  private:
    cv::Mat GetEnergyImg(const cv::Mat &img);
    cv::Mat computeGradientMagnitude(const cv::Mat &frame);
    float intensity(float currIndex, int start, int end);
    cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
    std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
    cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
    void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
    cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
    void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
    bool blockUpdate = false;
 };
 class SeamCarvingHorizontal : public SeamCarving
 {
  public:
    SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
  protected:
    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
 };
 class SeamCarvingVertical : public SeamCarving {
  public:
    SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
    virtual void computeNewFinalImage(int pos) override;
 #if DEBUG
    virtual void showSeamsImg() override;
 #endif
  protected:
    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
 };
 #endif // __SEAM__CARVING_HPP__
--- a/seamcarvinghoriz.cpp
+++ b/seamcarvinghoriz.cpp
@ -1,28 +0,0 @@
 #include "seamcarving.h"
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc.hpp>
 #include <iostream>
 #include <cfloat>
 cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
 {
 	cv::Mat retMat = frame.clone();
 	for(int row = 0; row < frame.rows; row++)
 	{
 		for(int col = 0; col < frame.cols; col++)
 		{
 			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
 			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
 			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
 		}
 	}
 	return retMat;
 }
 SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
 	SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
 {
 	sliderMax = image.cols;
 	init();
 }
--- a/seamcarvingvert.cpp
+++ b/seamcarvingvert.cpp
@ -1,51 +0,0 @@
 #include "seamcarving.h"
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc.hpp>
 #include <iostream>
 #include <cfloat>
 SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
 	SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
 {
 	sliderMax = image.rows;
 	cv::Mat oldImage = image;
 	image = image.t();
 	init();
 	image = oldImage;
 	finalImage = finalImage.t();
 }
 cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
 {
 	cv::Mat retMat = frame.clone();
 	for(int col = 0; col < frame.cols; col++)
 	{
 		for(int row = 0; row < frame.rows; row++)
 		{
 			retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
 			retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
 			retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
 		}
 	}
 	return retMat;
 }
 void SeamCarvingVertical::computeNewFinalImage(int pos)
 {
 	cv::Mat oldImage = image;
 	image = image.t();
 	SeamCarving::computeNewFinalImage(pos);
 	image = oldImage;
 	finalImage = finalImage.t();
 }
 #if DEBUG
 void SeamCarvingVertical::showSeamsImg()
 {
 	cv::Mat oldImage = this->image;
 	this->image = this->image.t();
 	SeamCarving::showImage();
 	this->image = oldImage;
 }
 #endif
Author	SHA1	Message	Date
uvos	ce3279254f	add inital non-working Danbooru tagger as a addition to llava tagging	2024-06-07 14:09:36 +02:00
uvos	55953bcdb7	Add the option to focus on a spcific person	2024-06-07 14:04:48 +02:00
uvos	f97f4640a9	PersonDatasetAssembler: add the option to mach images that do NOT contain the specified person	2024-06-07 14:04:07 +02:00
uvos	03e2b3119a	Add person dataset assembler, restructure repo	2024-04-05 12:46:06 +02:00
uvos	81475815fb	more effectively multithread	2024-04-05 12:23:11 +02:00
uvos	35cfa8a906	fix yolo network occasinally preditcting a match out side of image bounds	2024-04-05 11:48:10 +02:00
uvos	a279001151	add face recognition support to the system	2024-04-05 11:24:04 +02:00
uvos	b2ffbfa530	add face detection and recognition models	2024-04-05 11:23:50 +02:00
uvos	b3c2d585ae	paralleization wip	2023-06-30 00:48:56 +02:00
Carl Philipp Klemm	f5dad284e6	Wip setream carving	2023-06-29 19:25:31 +02:00