add inital non-working Danbooru tagger as a addition to llava tagging

Add the option to focus on a spcific person
PersonDatasetAssembler: add the option to mach images that do NOT contain the specified person
2024-06-07 14:09:36 +02:00 · 2024-06-07 14:04:48 +02:00 · 2024-06-07 14:04:07 +02:00 · 2024-04-05 12:46:06 +02:00 · 2024-04-05 12:23:11 +02:00 · 2024-04-05 11:48:10 +02:00
34 changed files with 1505 additions and 760 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,15 +1,7 @@
 cmake_minimum_required(VERSION 3.6)
-project(AIImagePrepross)
-
-find_package(OpenCV REQUIRED)
+project(ImageAiUtils)

 set(CMAKE_CXX_STANDARD 17)
+set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)

-set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
-
-add_executable(${PROJECT_NAME} ${SRC_FILES})
-target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
-target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
-target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
-
-install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
+add_subdirectory(SmartCrop)
--- a/DanbooruTagger/DanbooruTagger.py
+++ b/DanbooruTagger/DanbooruTagger.py
@ -0,0 +1,141 @@
+import warnings
+from deepdanbooru_onnx import DeepDanbooru
+import argparse
+import cv2
+import torch
+import os
+import numpy
+from typing import Iterator
+from torch.multiprocessing import Process, Queue
+import json
+from tqdm import tqdm
+
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		imagebgr = cv2.imread(path)
+		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
+		if image is None:
+			print(f"Warning: could not load {path}")
+		else:
+			yield image, path
+
+
+def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
+	model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
+		quantization_config=BitsAndBytesConfig(
+			load_in_4bit=True,
+			bnb_4bit_compute_dtype=torch.float16,
+			bnb_4bit_use_double_quant=False,
+			bnb_4bit_quant_type='nf4',
+			), device_map=device, attn_implementation="flash_attention_2")
+	processor = AutoProcessor.from_pretrained(model_name_or_path)
+	image_generator = image_loader(image_paths)
+
+	stop = False
+	finished_count = 0
+	while not stop:
+		prompts = list()
+		images = list()
+		filenames = list()
+		for i in range(0, batch_size):
+			image, filename = next(image_generator, (None, None))
+			if image is None:
+				stop = True
+				break
+
+			filenames.append(filename)
+			images.append(image)
+			prompts.append(prompt)
+
+		if len(images) == 0:
+			break
+
+		inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
+		generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
+		decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+		finished_count += len(images)
+		for i, decoded in enumerate(decodes):
+			trim = len(prompt) - len("<image>")
+			queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
+
+
+def split_list(input_list, count):
+	target_length = int(len(input_list) / count)
+	for i in range(0, count - 1):
+		yield input_list[i * target_length: (i + 1) * target_length]
+	yield input_list[(count - 1) * target_length: len(input_list)]
+
+
+def save_meta(meta_file, meta, reldir, common_description):
+	meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
+	if common_description is not None:
+		meta["text"] = common_description + meta["text"]
+	meta_file.write(json.dumps(meta) + '\n')
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("A script to tag images via llava")
+	parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
+	parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
+	parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
+	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
+	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
+	parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
+	args = parser.parse_args()
+
+	prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
+	os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+
+	image_paths = find_image_files(args.image_dir)
+	image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
+
+	print(f"Will use {torch.cuda.device_count()} processies to create tags")
+
+	logging.set_verbosity_error()
+	warnings.filterwarnings("ignore")
+	torch.multiprocessing.set_start_method('spawn')
+
+	queue = Queue()
+	processies = list()
+	for i in range(0, torch.cuda.device_count()):
+		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
+		processies[-1].start()
+
+	progress = tqdm(desc="Generateing tags", total=len(image_paths))
+	exit = False
+	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
+		while not exit:
+			if not queue.empty():
+				meta = queue.get()
+				save_meta(output_file, meta, args.image_dir, args.common_description)
+				progress.update()
+			exit = True
+			for process in processies:
+				if process.is_alive():
+					exit = False
+					break
+
+		while not queue.empty():
+			meta = queue.get()
+			save_meta(output_file, meta, args.image_dir, args.common_description)
+			progress.update()
+
+	for process in processies:
+		process.join()
+
--- a/DanbooruTagger/deepdanbooru_onnx/init.py
+++ b/DanbooruTagger/deepdanbooru_onnx/init.py
@ -0,0 +1,3 @@
+from .deepdanbooru_onnx import DeepDanbooru
+from .deepdanbooru_onnx import process_image
+__version__ = '0.0.8'
--- a/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
+++ b/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
@ -0,0 +1,244 @@
+import onnxruntime as ort
+from PIL import Image
+import numpy as np
+import os
+from tqdm import tqdm
+import requests
+import hashlib
+from typing import List, Union
+import shutil
+from pathlib import Path
+
+
+def process_image(image: Image.Image) -> np.ndarray:
+    """
+    Convert an image to a numpy array.
+    :param image: the image to convert
+    :return: the numpy array
+    """
+
+    image = image.convert("RGB").resize((512, 512))
+    image = np.array(image).astype(np.float32) / 255
+    image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
+    return image
+
+
+def download(url: str, save_path: str, md5: str, length: str) -> bool:
+    """
+    Download a file from url to save_path.
+    If the file already exists, check its md5.
+    If the md5 matches, return True,if the md5 doesn't match, return False.
+    :param url: the url of the file to download
+    :param save_path: the path to save the file
+    :param md5: the md5 of the file
+    :param length: the length of the file
+    :return: True if the file is downloaded successfully, False otherwise
+    """
+
+    try:
+        response = requests.get(url=url, stream=True)
+        with open(save_path, "wb") as f:
+            with tqdm.wrapattr(
+                response.raw, "read", total=length, desc="Downloading"
+            ) as r_raw:
+                shutil.copyfileobj(r_raw, f)
+        return (
+            True
+            if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
+            else False
+        )
+    except Exception as e:
+        print(e)
+        return False
+
+
+def download_model():
+    """
+    Download the model and tags file from the server.
+    :return: the path to the model and tags file
+    """
+
+    model_url = (
+        "https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
+    )
+    tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
+    model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
+    tags_md5 = "a3f764de985cdeba89f1d232a4204402"
+    model_length = 643993025
+    tags_length = 133810
+
+    home = str(Path.home()) + "/.deepdanbooru_onnx/"
+    if not os.path.exists(home):
+        os.mkdir(home)
+
+    model_name = "deepdanbooru.onnx"
+    tags_name = "tags.txt"
+
+    model_path = home + model_name
+    tags_path = home + tags_name
+    if os.path.exists(model_path):
+        if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
+            os.remove(model_path)
+            if not download(model_url, model_path, model_md5, model_length):
+                raise ValueError("Model download failed")
+
+    else:
+        if not download(model_url, model_path, model_md5, model_length):
+            raise ValueError("Model download failed")
+
+    if os.path.exists(tags_path):
+        if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
+            os.remove(tags_path)
+            if not download(tags_url, tags_path, tags_md5, tags_length):
+                raise ValueError("Tags download failed")
+    else:
+        if not download(tags_url, tags_path, tags_md5, tags_length):
+            raise ValueError("Tags download failed")
+    return model_path, tags_path
+
+
+class DeepDanbooru:
+    def __init__(
+        self,
+        mode: str = "auto",
+        model_path: Union[str, None] = None,
+        tags_path: Union[str, None] = None,
+        threshold: Union[float, int] = 0.6,
+        pin_memory: bool = False,
+        batch_size: int = 1,
+    ):
+        """
+        Initialize the DeepDanbooru class.
+        :param mode: the mode of the model, "cpu" or "gpu" or "auto"
+        :param model_path: the path to the model file
+        :param tags_path: the path to the tags file
+        :param threshold: the threshold of the model
+        :param pin_memory: whether to use pin memory
+        :param batch_size: the batch size of the model
+        """
+
+        providers = {
+            "cpu": "CPUExecutionProvider",
+            "gpu": "CUDAExecutionProvider",
+            "tensorrt": "TensorrtExecutionProvider",
+            "auto": (
+                "CUDAExecutionProvider"
+                if "CUDAExecutionProvider" in ort.get_available_providers()
+                else "CPUExecutionProvider"
+            ),
+        }
+
+        if not (isinstance(threshold, float) or isinstance(threshold, int)):
+            raise TypeError("threshold must be float or int")
+        if threshold < 0 or threshold > 1:
+            raise ValueError("threshold must be between 0 and 1")
+        if mode not in providers:
+            raise ValueError(
+                "Mode not supported. Please choose from: cpu, gpu, tensorrt"
+            )
+        if providers[mode] not in ort.get_available_providers():
+            raise ValueError(
+                f"Your device is not supported {mode}. Please choose from: cpu"
+            )
+        if model_path is not None and not os.path.exists(model_path):
+            raise FileNotFoundError("Model file not found")
+        if tags_path is not None and not os.path.exists(tags_path):
+            raise FileNotFoundError("Tags file not found")
+
+        if model_path is None or tags_path is None:
+            model_path, tags_path = download_model()
+
+        self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
+        self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
+
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = [output.name for output in self.session.get_outputs()]
+        self.threshold = threshold
+        self.pin_memory = pin_memory
+        self.batch_size = batch_size
+        self.mode = mode
+        self.cache = {}
+
+    def __str__(self) -> str:
+        return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+    def from_image_inference(self, image: Image.Image) -> dict:
+        image = process_image(image)
+        return self.predict(image)
+
+    def from_ndarray_inferece(self, image: np.ndarray) -> dict:
+        if image.shape != (1, 512, 512, 3):
+            raise ValueError(f"Image must be {(1, 512, 512, 3)}")
+        return self.predict(image)
+
+    def from_file_inference(self, image: str) -> dict:
+        return self.from_image_inference(Image.open(image))
+
+    def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
+        if self.pin_memory:
+            image = [process_image(Image.open(i)) for i in image]
+        for i in [
+            image[i : i + self.batch_size]
+            for i in range(0, len(image), self.batch_size)
+        ]:
+            imagelist = i
+            bs = len(i)
+            _imagelist, idx, hashlist = [], [], []
+            for j in range(len(i)):
+                img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
+                image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
+                hashlist.append(image_hash)
+                if image_hash in self.cache:
+                    continue
+                if not self.pin_memory:
+                    _imagelist.append(process_image(img))
+                else:
+                    _imagelist.append(imagelist[j])
+                idx.append(j)
+
+            imagelist = _imagelist
+            if len(imagelist) != 0:
+                _image = np.vstack(imagelist)
+                results = self.inference(_image)
+                results_idx = 0
+            else:
+                results = []
+
+            for i in range(bs):
+                image_tag = {}
+                if i in idx:
+                    hash = hashlist[i]
+                    for tag, score in zip(self.tags, results[results_idx]):
+                        if score >= self.threshold:
+                            image_tag[tag] = score
+                    results_idx += 1
+                    self.cache[hash] = image_tag
+                    yield image_tag
+                else:
+                    yield self.cache[hashlist[i]]
+
+    def inference(self, image):
+        return self.session.run(self.output_name, {self.input_name: image})[0]
+
+    def predict(self, image):
+        result = self.inference(image)
+        image_tag = {}
+        for tag, score in zip(self.tags, result[0]):
+            if score >= self.threshold:
+                image_tag[tag] = score
+        return image_tag
+
+    def __call__(self, image) -> Union[dict, List[dict]]:
+        if isinstance(image, str):
+            return self.from_file_inference(image)
+        elif isinstance(image, np.ndarray):
+            return self.from_ndarray_inferece(image)
+        elif isinstance(image, list) or isinstance(image, tuple):
+            return self.from_list_inference(image)
+        elif isinstance(image, Image.Image):
+            return self.from_image_inference(image)
+        else:
+            raise ValueError("Image must be a file path or a numpy array or list/tuple")
--- a/DanbooruTagger/example.py
+++ b/DanbooruTagger/example.py
@ -0,0 +1,3 @@
+from deepdanbooru_onnx import DeepDanbooru
+danbooru = DeepDanbooru()
+print(danbooru("/run/media/philipp/20404acc-312c-44f2-b2d1-3a0a14257cc6/.Media/porn/00244-3145022840.png"))
--- a/PersonDatasetAssembler/PersonDatasetAssembler.py
+++ b/PersonDatasetAssembler/PersonDatasetAssembler.py
@ -0,0 +1,154 @@
+#!/bin/python3
+import argparse
+import os
+from typing import Iterator
+import cv2
+import numpy
+from tqdm import tqdm
+from wand.exceptions import BlobError
+from wand.image import Image
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+image_ext_wand = [".dng", ".arw"]
+
+
+class LoadException(Exception):
+	pass
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv or extension in image_ext_wand:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		if extension in image_ext_ocv:
+			image = cv2.imread(path)
+			if image is None:
+				print(f"Warning: could not load {path}")
+			else:
+				yield image
+		elif extension in image_ext_wand:
+			try:
+				image = Image(filename=path)
+			except BlobError as e:
+				print(f"Warning: could not load {path}, {e}")
+				continue
+
+
+def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
+	ret = True
+	frame_counter = 0
+	while ret:
+		video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
+		ret, frame = video.read()
+		if ret:
+			yield frame
+		frame_counter += interval
+
+
+def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
+	detector.setInputSize([image.shape[1], image.shape[0]])
+	faces = detector.detect(image)[1]
+	if faces is None:
+		return 0, False
+	for face in faces:
+		cropped_image = recognizer.alignCrop(image, face)
+		features = recognizer.feature(cropped_image)
+		score_accum = 0.0
+		for referance in referance_features:
+			score_accum += recognizer.match(referance, features, 0)
+		score = score_accum / len(referance_features)
+		if score > thresh:
+			return score, True
+	return 0, False
+
+
+def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
+	images = list()
+	out = list()
+
+	if os.path.isfile(referance_path):
+		image = cv2.imread(referance_path)
+		if image is None:
+			print(f"Could not load image from {referance_path}")
+		else:
+			images.append(image)
+	elif os.path.isdir(referance_path):
+		filenames = find_image_files(referance_path)
+		images = list(image_loader(filenames))
+
+	for image in images:
+		detector.setInputSize([image.shape[1], image.shape[0]])
+		faces = detector.detect(image)[1]
+		if faces is None:
+			print("unable to find face in referance image")
+			exit(1)
+		image = recognizer.alignCrop(image, faces[0])
+		features = recognizer.feature(image)
+		out.append(features)
+
+	return out
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
+	parser.add_argument('--out', '-o', default="out", help="place to put dataset")
+	parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
+	parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
+	parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
+	parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
+	parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
+	parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
+	parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
+	args = parser.parse_args()
+
+	recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
+	detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
+		score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
+
+	referance_features = process_referance(detector, recognizer, args.referance)
+	if len(referance_features) < 1:
+		print(f"Could not load any referance image(s) from {args.referance}")
+		exit(1)
+
+	if os.path.isfile(args.input):
+		video = cv2.VideoCapture(args.input)
+		if not video.isOpened():
+			print(f"Unable to open {args.input} as a video file")
+			exit(1)
+		image_generator = extract_video_images(video, args.skip + 1)
+		total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
+	elif os.path.isdir(args.input):
+		image_filenams = find_image_files(args.input)
+		image_generator = image_loader(image_filenams)
+		total_images = len(image_filenams)
+	else:
+		print(f"{args.input} is not a video file nor is it a directory")
+		exit(1)
+
+	os.makedirs(args.out, exist_ok=True)
+
+	progress = tqdm(total=int(total_images), desc="0.00")
+	counter = 0
+	for image in image_generator:
+		if image.shape[0] > 512:
+			aspect = image.shape[0] / image.shape[1]
+			resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
+		else:
+			resized = image
+		score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
+		if match and not args.invert or not match and args.invert:
+			filename = f"{counter:04}.png"
+			cv2.imwrite(os.path.join(args.out, filename), image)
+			counter += 1
+		progress.set_description(f"{score:1.2f}")
+		progress.update()
+
--- a/SmartCrop/CMakeLists.txt
+++ b/SmartCrop/CMakeLists.txt
@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.6)
+
+find_package(OpenCV REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+
+set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
+
+add_executable(smartcrop ${SRC_FILES})
+target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
+target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
+target_compile_options(smartcrop PRIVATE -s -g -Wall)
+message(WARNING ${WEIGHT_DIR})
+target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
+
+install(TARGETS smartcrop RUNTIME DESTINATION bin)
--- a/SmartCrop/facerecognizer.cpp
+++ b/SmartCrop/facerecognizer.cpp
@ -0,0 +1,143 @@
+#include "facerecognizer.h"
+#include <filesystem>
+
+#define INCBIN_PREFIX r
+#include "incbin.h"
+
+INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
+INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
+
+#include <opencv2/dnn/dnn.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <fstream>
+
+#include "log.h"
+
+static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
+
+FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
+{
+	if(detectorPath.empty())
+	{
+		Log(Log::INFO)<<"Using builtin face detection model";
+
+		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from built in file");
+	}
+	else
+	{
+		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from "+detectorPath.string());
+	}
+
+	bool defaultNetwork = recognizerPath.empty();
+
+	if(defaultNetwork)
+	{
+		Log(Log::INFO)<<"Using builtin face recognition model";
+		recognizerPath = cv::tempfile("onnx");
+		std::ofstream file(recognizerPath);
+		if(!file.is_open())
+			throw LoadException("Unable open temporary file at "+recognizerPath.string());
+		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
+		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
+		file.close();
+	}
+
+	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+
+	if(defaultNetwork)
+		std::filesystem::remove(recognizerPath);
+
+	if(!recognizer)
+		throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
+
+	addReferances(referances);
+}
+
+cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
+{
+	detector->setInputSize(input.size());
+	cv::Mat faces;
+	detector->detect(input, faces);
+	return faces;
+}
+
+bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
+{
+	bool ret = false;
+	for(const cv::Mat& image : referances)
+	{
+		cv::Mat faces = detectFaces(image);
+		assert(faces.cols == 15);
+		if(faces.empty())
+		{
+			Log(Log::WARN)<<"A referance image provided dose not contian any face";
+			continue;
+		}
+		if(faces.rows > 1)
+			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
+		cv::Mat cropedImage;
+		recognizer->alignCrop(image, faces.row(0), cropedImage);
+		cv::Mat features;
+		recognizer->feature(cropedImage, features);
+		referanceFeatures.push_back(features.clone());
+		ret = true;
+	}
+
+	return ret;
+}
+
+void FaceRecognizer::setThreshold(double threasholdIn)
+{
+	threshold = threasholdIn;
+}
+
+double FaceRecognizer::getThreshold()
+{
+	return threshold;
+}
+
+void FaceRecognizer::clearReferances()
+{
+	referanceFeatures.clear();
+}
+
+FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
+{
+	cv::Mat faces = detectFaces(input);
+
+	Detection bestMatch;
+	bestMatch.confidence = 0;
+	bestMatch.person = -1;
+
+	if(alone && faces.rows > 1)
+	{
+		bestMatch.person = -2;
+		return bestMatch;
+	}
+
+	for(int i = 0; i < faces.rows; ++i)
+	{
+		cv::Mat face;
+		recognizer->alignCrop(input, faces.row(i), face);
+		cv::Mat features;
+		recognizer->feature(face, features);
+		features = features.clone();
+		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
+		{
+			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
+			if(score > threshold && score > bestMatch.confidence)
+			{
+				bestMatch.confidence = score;
+				bestMatch.person = referanceIndex;
+				bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
+			}
+		}
+	}
+
+	return bestMatch;
+}
--- a/SmartCrop/facerecognizer.h
+++ b/SmartCrop/facerecognizer.h
@ -0,0 +1,48 @@
+#pragma once
+#include <exception>
+#include <opencv2/core/mat.hpp>
+#include <opencv2/objdetect/face.hpp>
+#include <opencv2/core.hpp>
+#include <vector>
+#include <memory>
+#include <filesystem>
+
+class FaceRecognizer
+{
+public:
+
+	struct Detection
+	{
+		int person;
+		float confidence;
+		cv::Rect rect;
+	};
+
+	class LoadException : public std::exception
+	{
+	private:
+		std::string message;
+	public:
+		LoadException(const std::string& msg): std::exception(), message(msg) {}
+		virtual const char* what() const throw() override
+		{
+			return message.c_str();
+		}
+	};
+
+private:
+	std::vector<cv::Mat> referanceFeatures;
+	std::shared_ptr<cv::FaceRecognizerSF> recognizer;
+	std::shared_ptr<cv::FaceDetectorYN> detector;
+
+	double threshold = 0.363;
+
+public:
+	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
+	cv::Mat detectFaces(const cv::Mat& input);
+	Detection isMatch(const cv::Mat& input, bool alone = false);
+	bool addReferances(const std::vector<cv::Mat>& referances);
+	void setThreshold(double threashold);
+	double getThreshold();
+	void clearReferances();
+};
--- a/SmartCrop/incbin.h
+++ b/SmartCrop/incbin.h
--- a/SmartCrop/intelligentroi.cpp
+++ b/SmartCrop/intelligentroi.cpp
@ -31,11 +31,12 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
 	}
 }

-cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
+cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
 {
-	int radius = std::min(imageSize.height, imageSize.width)/2;
+	incompleate = false;
+	int diameter = std::min(imageSize.height, imageSize.width);
 	cv::Point2i point(imageSize.width/2, imageSize.height/2);
-	cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2);
+	cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);

 	std::sort(mustInclude.begin(), mustInclude.end(),
 		[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
@ -43,8 +44,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
 	while(true)
 	{
 		cv::Rect includeRect = rectFromPoints(mustInclude);
-		if(includeRect.width-2 > radius || includeRect.height-2 > radius)
+		if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
 		{
+			incompleate = true;
 			slideRectToPoint(candiate, mustInclude.back().first);
 			mustInclude.pop_back();
 			Log(Log::DEBUG)<<"cant fill";
@ -52,7 +54,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
 				Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
 		}
 		else
+		{
 			break;
+		}
 	}

 	for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
@ -75,25 +79,30 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
 	personId = yolo.getClassForStr("person");
 }

-cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
+bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
 {
-	if(!detections.empty())
+	std::vector<std::pair<cv::Point2i, int>> corners;
+	for(size_t i = 0; i < detections.size(); ++i)
 	{
-		std::vector<std::pair<cv::Point2i, int>> corners;
-		for(size_t i = 0; i < detections.size(); ++i)
+		int priority = detections[i].priority;
+		if(detections[i].class_id == personId)
+		{
+			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
+			corners.push_back({detections[i].box.tl(), priority+1});
+			corners.push_back({detections[i].box.br(), priority});
+			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
+			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+		}
+		else
 		{
-			int priority = detections[i].priority;
-			if(detections[i].class_id == personId)
-				corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
 			corners.push_back({detections[i].box.tl(), priority});
 			corners.push_back({detections[i].box.br(), priority});
 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
 			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
 		}
-
-		return maxRect(imageSize, corners);
 	}

-	Log(Log::DEBUG)<<"Using center crop as there are no detections";
-	return maxRect(imageSize);
+	bool incompleate;
+	out = maxRect(incompleate, imageSize, corners);
+	return incompleate;
 }
--- a/SmartCrop/intelligentroi.h
+++ b/SmartCrop/intelligentroi.h
@ -10,9 +10,9 @@ private:
 	int personId;
 	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
 	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
-	static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
+	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});

 public:
 	InteligentRoi(const Yolo& yolo);
-	cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
+	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
 };
--- a/SmartCrop/log.cpp
+++ b/SmartCrop/log.cpp
--- a/SmartCrop/log.h
+++ b/SmartCrop/log.h
--- a/SmartCrop/main.cpp
+++ b/SmartCrop/main.cpp
@ -0,0 +1,440 @@
+#include <filesystem>
+#include <iostream>
+#include <opencv2/core.hpp>
+#include <opencv2/core/types.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <algorithm>
+#include <execution>
+#include <string>
+#include <vector>
+#include <numeric>
+
+#include "yolo.h"
+#include "log.h"
+#include "options.h"
+#include "utils.h"
+#include "intelligentroi.h"
+#include "seamcarving.h"
+#include "facerecognizer.h"
+
+const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
+{
+	const Yolo::Detection* inDetection = nullptr;
+	for(const Yolo::Detection& detection : detections)
+	{
+		if(ignore && ignore == &detection)
+			continue;
+
+		if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
+		{
+			if(!inDetection || detection.box.br().x > inDetection->box.br().x)
+			inDetection = &detection;
+		}
+	}
+	return inDetection;
+}
+
+bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
+{
+	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
+
+	Log(Log::DEBUG, false)<<__func__<<" point "<<x;
+
+	if(!inDetection)
+	{
+		const Yolo::Detection* closest = nullptr;
+		for(const Yolo::Detection& detection : detections)
+		{
+			if(detection.box.x > x)
+			{
+				if(closest == nullptr || detection.box.x-x > closest->box.x-x)
+					closest = &detection;
+			}
+		}
+		if(closest)
+			x = closest->box.x;
+		else
+			x = imgSizeX;
+
+		Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
+		return false;
+	}
+	else
+	{
+		x = inDetection->box.br().x;
+		Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
+		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
+		if(candidateDetection && candidateDetection->box.br().x > x)
+		{
+			Log(Log::DEBUG)<<"it is again in a box";
+			return findRegionEndpointHoriz(x, detections, imgSizeX);
+		}
+		else
+		{
+			Log(Log::DEBUG)<<"it is not in a box";
+			return true;
+		}
+	}
+}
+
+std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
+{
+	std::vector<std::pair<cv::Mat, bool>> out;
+
+	std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
+
+	for(int x = 0; x < image.cols; ++x)
+	{
+		int start = x;
+		bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
+
+		int width = x-start;
+		if(x < image.cols)
+			++width;
+		cv::Rect rect(start, 0, width, image.rows);
+		Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
+		cv::Mat slice = image(rect);
+		out.push_back({slice, frozen});
+	}
+
+	return out;
+}
+
+cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
+{
+	assert(!slices.empty());
+
+	int cols = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+		cols += slice.first.cols;
+
+
+	cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
+	Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
+
+	int col = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+	{
+		cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
+		Log(Log::DEBUG)<<__func__<<' '<<rect;
+		slice.first.copyTo(image(rect));
+		col += slice.first.cols-1;
+	}
+
+	return image;
+}
+
+void transposeRect(cv::Rect& rect)
+{
+	int x = rect.x;
+	rect.x = rect.y;
+	rect.y = x;
+
+	int width = rect.width;
+	rect.width = rect.height;
+	rect.height = width;
+}
+
+bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
+{
+	detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
+
+	double aspectRatio = image.cols/static_cast<double>(image.rows);
+
+	Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
+
+	bool vertical = false;
+	if(aspectRatio > targetAspectRatio)
+		vertical = true;
+
+	int requiredLines = 0;
+	if(!vertical)
+		requiredLines = image.rows*targetAspectRatio - image.cols;
+	else
+		requiredLines = image.cols/targetAspectRatio - image.rows;
+
+	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
+
+	if(vertical)
+	{
+		cv::transpose(image, image);
+		for(Yolo::Detection& detection : detections)
+			transposeRect(detection.box);
+	}
+
+	std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
+	Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
+	int totalResizableSize = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+	{
+		Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
+		if(!slice.second)
+			totalResizableSize += slice.first.cols;
+	}
+
+	if(totalResizableSize < requiredLines+1)
+	{
+		Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
+		if(vertical)
+			cv::transpose(image, image);
+		return false;
+	}
+
+	std::vector<int> seamsForSlice(slices.size(), 0);
+	for(size_t i = 0; i < slices.size(); ++i)
+	{
+		if(!slices[i].second)
+			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
+	}
+
+	int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
+	for(ssize_t i = slices.size()-1; i >= 0; --i)
+	{
+		if(!slices[i].second)
+		{
+			seamsForSlice[i] += residual;
+			break;
+		}
+	}
+
+	for(size_t i = 0; i < slices.size(); ++i)
+	{
+		if(seamsForSlice[i] != 0)
+		{
+			bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
+			if(!ret)
+			{
+				if(vertical)
+					transpose(image, image);
+				return false;
+			}
+		}
+	}
+
+	image = assembleFromSlicesHoriz(slices);
+
+	if(vertical)
+		cv::transpose(image, image);
+
+	return true;
+}
+
+void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
+{
+	for(const Yolo::Detection& detection : detections)
+	{
+		cv::rectangle(image, detection.box, detection.color, 3);
+		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
+		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
+		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
+		cv::rectangle(image, textBox, detection.color, cv::FILLED);
+		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
+	}
+
+	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
+}
+
+static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
+{
+	int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
+	if(std::max(image.cols, image.rows) > longTargetSize)
+	{
+		if(image.cols > image.rows)
+		{
+			double ratio = static_cast<double>(longTargetSize)/image.cols;
+			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
+		}
+		else
+		{
+			double ratio = static_cast<double>(longTargetSize)/image.rows;
+			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
+		}
+	}
+}
+
+void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
+	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
+{
+	InteligentRoi intRoi(yolo);
+	cv::Mat image = cv::imread(path);
+	if(!image.data)
+	{
+		Log(Log::WARN)<<"could not load image "<<path<<" skipping";
+		return;
+	}
+
+	reduceSize(image, config.targetSize);
+
+	std::vector<Yolo::Detection> detections = yolo.runInference(image);
+
+	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
+	for(Yolo::Detection& detection : detections)
+	{
+		bool hasmatch = false;
+		if(recognizer && detection.className == "person")
+		{
+			cv::Mat person = image(detection.box);
+			reconizerMutex.lock();
+			FaceRecognizer::Detection match = recognizer->isMatch(person);
+			reconizerMutex.unlock();
+			if(match.person >= 0)
+			{
+				detection.priority += 10;
+				hasmatch = true;
+				detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
+			}
+		}
+		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
+	}
+
+	cv::Rect crop;
+	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
+
+	if(config.seamCarving && incompleate)
+	{
+		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
+		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
+		{
+			detections = yolo.runInference(image);
+		}
+	}
+
+	cv::Mat croppedImage;
+
+	if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
+	{
+		intRoi.getCropRectangle(crop, detections, image.size());
+
+		if(config.debug)
+		{
+			cv::Mat debugImage = image.clone();
+			drawDebugInfo(debugImage, crop, detections);
+			bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
+			if(!ret)
+				Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
+		}
+
+		croppedImage = image(crop);
+	}
+	else if(!incompleate)
+	{
+		croppedImage = image(crop);
+	}
+	else
+	{
+		croppedImage = image;
+	}
+
+	cv::Mat resizedImage;
+	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
+	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
+	if(!ret)
+		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
+}
+
+void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
+		std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
+{
+	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
+	for(std::filesystem::path path : images)
+		pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
+}
+
+template<typename T>
+std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
+{
+	std::vector<std::vector<T>> out;
+
+	size_t length = vec.size()/parts;
+	size_t remain = vec.size() % parts;
+
+	size_t begin = 0;
+	size_t end = 0;
+
+	for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
+	{
+		end += (remain > 0) ? (length + !!(remain--)) : length;
+		out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
+		begin = end;
+	}
+
+	return out;
+}
+
+int main(int argc, char* argv[])
+{
+	Log::level = Log::INFO;
+
+	Config config;
+	argp_parse(&argp, argc, argv, 0, 0, &config);
+
+	if(config.outputDir.empty())
+	{
+		Log(Log::ERROR)<<"a output path \"-o\" is required";
+		return 1;
+	}
+
+	if(config.imagePaths.empty())
+	{
+		Log(Log::ERROR)<<"at least one input image or directory is required";
+		return 1;
+	}
+
+	std::vector<std::filesystem::path> imagePaths;
+
+	for(const std::filesystem::path& path : config.imagePaths)
+		getImageFiles(path, imagePaths);
+
+	Log(Log::DEBUG)<<"Images:";
+	for(const::std::filesystem::path& path: imagePaths)
+		Log(Log::DEBUG)<<path;
+
+	if(imagePaths.empty())
+	{
+		Log(Log::ERROR)<<"no image was found\n";
+		return 1;
+	}
+
+	if(!std::filesystem::exists(config.outputDir))
+	{
+		if(!std::filesystem::create_directory(config.outputDir))
+		{
+			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
+			return 1;
+		}
+	}
+
+	std::filesystem::path debugOutputPath(config.outputDir/"debug");
+	if(config.debug)
+	{
+		if(!std::filesystem::exists(debugOutputPath))
+			std::filesystem::create_directory(debugOutputPath);
+	}
+
+	FaceRecognizer* recognizer = nullptr;
+	std::mutex recognizerMutex;
+	if(!config.focusPersonImage.empty())
+	{
+		cv::Mat personImage = cv::imread(config.focusPersonImage);
+		if(personImage.empty())
+		{
+			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
+			return 1;
+		}
+		recognizer = new FaceRecognizer();
+		recognizer->addReferances({personImage});
+		recognizer->setThreshold(config.threshold);
+	}
+
+	std::vector<std::thread> threads;
+	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
+
+	for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
+		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
+
+	for(std::thread& thread : threads)
+		thread.join();
+
+	return 0;
+}
--- a/SmartCrop/options.h
+++ b/SmartCrop/options.h
@ -0,0 +1,98 @@
+#pragma once
+#include <string>
+#include <vector>
+#include <argp.h>
+#include <iostream>
+#include <filesystem>
+#include <opencv2/core/types.hpp>
+#include "log.h"
+
+const char *argp_program_version = "AIImagePreprocesses";
+const char *argp_program_bug_address = "<carl@uvos.xyz>";
+static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
+static char args_doc[] = "FILE(S)";
+
+static struct argp_option options[] =
+{
+  {"verbose",		'v', 0,				0,	"Show debug messages" },
+  {"quiet", 		'q', 0,				0,	"only output data" },
+  {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" },
+  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
+  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
+  {"debug", 		'd', 0,				0,	"output debug images" },
+  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
+  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
+  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
+  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
+  {0}
+};
+
+struct Config
+{
+	std::vector<std::filesystem::path> imagePaths;
+	std::filesystem::path modelPath;
+	std::filesystem::path classesPath;
+	std::filesystem::path outputDir;
+	std::filesystem::path focusPersonImage;
+	bool seamCarving = false;
+	bool debug = false;
+	double threshold = 0.363;
+	cv::Size targetSize = cv::Size(512, 512);
+};
+
+static error_t parse_opt (int key, char *arg, struct argp_state *state)
+{
+	Config *config = reinterpret_cast<Config*>(state->input);
+	try
+	{
+		switch (key)
+		{
+		case 'q':
+			Log::level = Log::ERROR;
+			break;
+		case 'v':
+			Log::level = Log::DEBUG;
+			break;
+		case 'm':
+			config->modelPath = arg;
+			break;
+		case 'c':
+			config->classesPath = arg;
+			break;
+		case 'd':
+			config->debug = true;
+			break;
+		case 'o':
+			config->outputDir.assign(arg);
+			break;
+		case 's':
+			config->seamCarving = true;
+			break;
+		case 'f':
+			config->focusPersonImage = arg;
+			break;
+		case 't':
+			config->threshold = std::atof(arg);
+			break;
+		case 'z':
+		{
+			int x = std::stoi(arg);
+			config->targetSize = cv::Size(x, x);
+			break;
+		}
+		case ARGP_KEY_ARG:
+			config->imagePaths.push_back(arg);
+			break;
+		default:
+			return ARGP_ERR_UNKNOWN;
+		}
+	}
+	catch(const std::invalid_argument& ex)
+	{
+		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
+		return ARGP_KEY_ERROR;
+	}
+	return 0;
+}
+
+static struct argp argp = {options, parse_opt, args_doc, doc};
--- a/SmartCrop/readfile.h
+++ b/SmartCrop/readfile.h
--- a/SmartCrop/seamcarving.cpp
+++ b/SmartCrop/seamcarving.cpp
@ -1,19 +1,19 @@
 #include "seamcarving.h"
+
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc.hpp>
 #include <iostream>
-#if __cplusplus >= 201703L
 #include <filesystem>
-#endif
 #include <cfloat>
+#include <vector>
+#include "log.h"

-SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) :
-	image(img), seams(seams), grow(grow) {}
-
-void SeamCarving::init()
+bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
 {
 	cv::Mat newFrame = image.clone();
+	assert(!newFrame.empty());
+	std::vector<std::vector<int>> vecSeams;

 	for(int i = 0; i < seams; i++)
 	{
@ -24,230 +24,55 @@ void SeamCarving::init()
 		cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);

 		if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
-		{
-			finalImage = image;
-			break;
-		}
+			return false;
 		std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
 		vecSeams.push_back(seam);
+		if(seamsVect)
+			seamsVect->push_back(seam);

-		newFrame = removeLeastImportantPath(newFrame,seam);
+		newFrame = removeLeastImportantPath(newFrame, seam);

-		if(newFrame.rows == 0 && newFrame.cols == 0)
-		{
-			finalImage = image;
-			break;
-		}
+		if(newFrame.rows == 0 || newFrame.cols == 0)
+			return false;
 	}

 	if (grow)
 	{
 		cv::Mat growMat = image.clone();

-		for (int i = 0; i < vecSeams.size(); i++)
+		for(size_t i = 0; i < vecSeams.size(); i++)
 		{
 			growMat = addLeastImportantPath(growMat,vecSeams[i]);
 		}
-		finalImage = growMat;
+		image = growMat;
 	}
 	else
 	{
-		finalImage = newFrame;
+		image = newFrame;
 	}
-
-	sliderPos = seams;
-
+	return true;
 }

-void SeamCarving::computeNewFinalImage(int sliderPos)
+bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
 {
-	if(sliderPos == 0)
-	{
-		finalImage =  image;
-		return;
-	}
-	if(sliderPos < 1 || sliderPos >= sliderMax-1)
-	{
-		return;
-	}
-	if(sliderPos > vecSeams.size())
-	{
-		cv::Mat newFrame = finalImage.clone();
-		for(int i = vecSeams.size()-1; i < sliderPos; i++)
-		{
-			//Gradient Magnitude for intensity of image.
-			cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
-			//Use DP to create the real energy map that is used for path calculation.
-			// Strictly using vertical paths for testing simplicity.
-			cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
-
-			if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
-			{
-				finalImage = image;
-				break;
-			}
-			std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
-			vecSeams.push_back(seam);
-			newFrame = removeLeastImportantPath(newFrame,seam);
-			if(newFrame.rows == 0 && newFrame.cols == 0)
-			{
-				finalImage = image;
-				break;
-			}
-		}
-		if (grow)
-		{
-			cv::Mat growMat = image.clone();
-
-			for (int i = 0; i < vecSeams.size(); i++)
-			{
-				growMat = addLeastImportantPath(growMat,vecSeams[i]);
-			}
-
-			finalImage = growMat;
-		}
-		else
-		{
-			finalImage = newFrame;
-		}
-	}
-	else if (sliderPos < vecSeams.size())
-	{
-		cv::Mat newFrame = image.clone();
-		for(int i = 0; i < sliderPos; i++)   // TODO check if it is faster to add seams back (probably not)
-		{
-
-			if (grow)
-			{
-				newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
-			}
-			else
-			{
-				newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
-			}
-
-			if(newFrame.rows == 0 && newFrame.cols == 0)
-			{
-				finalImage = image;
-				break;
-			}
-		}
-		finalImage = newFrame;
-	}
+	cv::transpose(image, image);
+	bool ret = strechImage(image, seams, grow, seamsVect);
+	cv::transpose(image, image);
+	return ret;
 }

-const cv::Mat& SeamCarving::getFinalImage()
+bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
 {
-	return finalImage;
-}
+	std::vector<std::vector<int>> seamsVect;
+	seamsImage = image.clone();

-void SeamCarving::showSeamsImg()
-{
-	cv::Mat seamsFrame = image.clone();
-	//std::cout << "sliderPos: " << sliderPos << std::endl;
-	for(int i = 0; i < sliderPos; i++)
-	{
-		seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
-	}
-	cv::imwrite("output/seams_image.jpg", seamsFrame);
-	cv::imshow( "Image Seams", seamsFrame);
-}
+	bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
+	if(!ret)
+		return false;

-static void onChange( int pos, void* object )
-{
-	SeamCarving* sc = (SeamCarving*)(object);
-	/*if(sc->getBlockUpdateStatus()) {
-	    return;
-	}*/
-	sc->computeNewFinalImage(pos);
-	imshow("Final Image", sc->getFinalImage());
-#if DEBUG
-	sc->showSeamsImg();
-#endif
-}
-static void onMouse( int event, int x, int y, int, void* object)
-{
-	SeamCarving* sc = (SeamCarving*)(object);
-	if( event == cv::EVENT_LBUTTONDOWN ||
-	        event == cv::EVENT_RBUTTONDOWN ||
-	        event == cv::EVENT_MBUTTONDOWN
-	  )
-	{
-		sc->setBlockUpdate(true);
-	}
-	else if(event == cv::EVENT_LBUTTONUP ||
-	        event == cv::EVENT_RBUTTONUP ||
-	        event == cv::EVENT_MBUTTONUP)
-	{
-		sc->setBlockUpdate(false);
-	}
-}
-
-void SeamCarving::setBlockUpdate(bool bUpdate)
-{
-	blockUpdate = bUpdate;
-}
-
-bool SeamCarving::getBlockUpdateStatus()
-{
-	return blockUpdate;
-}
-
-void SeamCarving::showImage()
-{
-#if __cplusplus >= 201703L
-	if(!std::filesystem::exists("output"))
-	{
-		std::filesystem::create_directory("output");
-	}
-#endif
-	if( image.empty() )
-	{
-		std::cout <<  "Could not open raw image" << std::endl ;
-		return;
-	}
-	namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
-	cv::imshow( "Raw Image", image );
-
-	if( finalImage.empty() )
-	{
-		std::cout <<  "Could not open final image" << std::endl ;
-		return;
-	}
-#if DEBUG
-	namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
-	cv::Mat gradient = computeGradientMagnitude(image);
-	cv::Mat u8_image;
-	gradient.convertTo(u8_image, CV_8U);
-
-	cv::imwrite("output/gradient_image.jpg", u8_image);
-	cv::imshow("gradient Image", u8_image);
-
-	namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
-	cv::Mat u8_image2;
-	cv::Mat intensityMat = computePathIntensityMat(gradient);
-	cv::Mat dst;
-	cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
-	dst.convertTo(u8_image2, CV_8U);
-	cv::imwrite("output/intensity_image.jpg", u8_image2);
-	cv::imshow( "intensity Image", u8_image2);
-
-	//cv::Mat engImg = GetEnergyImg(image);
-	//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
-	//cv::Mat u8_image3;
-	//engImg.convertTo(u8_image3, CV_8U);
-	//cv::imshow( "energy Image", u8_image3);
-	namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
-	showSeamsImg();
-
-#endif
-
-	namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
-	cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
-	//cv::setMouseCallback("Final Image", onMouse, this );
-	cv::imwrite("output/final_image.jpg", finalImage);
-	cv::imshow("Final Image", finalImage);
-	cv::waitKey(0);
+	for(size_t i = 0; i < seamsVect.size(); ++i)
+		seamsImage = drawSeam(seamsImage, seamsVect[i]);
+	return true;
 }

 cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
@ -392,9 +217,7 @@ cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std
 	cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
 	cv::Mat newMat = cv::Mat(size, original.type());

-	unsigned char *rawOrig = original.data;
-	unsigned char *rawOutput = newMat.data;
-	for(int row = 0; row < seam.size(); row++)
+	for(size_t row = 0; row < seam.size(); row++)
 	{
 		removePixel(original, newMat, row, seam[row]);
 	}
@ -460,9 +283,7 @@ cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::v
 	cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
 	cv::Mat newMat = cv::Mat(size, original.type());

-	unsigned char *rawOrig = original.data;
-	unsigned char *rawOutput = newMat.data;
-	for(int row = 0; row < seam.size(); row++)
+	for(size_t row = 0; row < seam.size(); row++)
 	{
 		//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
 		addPixel(original, newMat, row, seam[row]);
@ -518,3 +339,18 @@ void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row,
 		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
 	}
 }
+
+cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
+{
+	cv::Mat retMat = frame.clone();
+	for(int row = 0; row < frame.rows; row++)
+	{
+		for(int col = 0; col < frame.cols; col++)
+		{
+			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
+			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
+			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
+		}
+	}
+	return retMat;
+}
--- a/SmartCrop/seamcarving.h
+++ b/SmartCrop/seamcarving.h
@ -0,0 +1,24 @@
+#pragma once
+
+#include <opencv2/core/core.hpp>
+#include <vector>
+
+class SeamCarving
+{
+private:
+	static cv::Mat GetEnergyImg(const cv::Mat &img);
+	static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
+	static float intensity(float currIndex, int start, int end);
+	static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
+	static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
+	static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
+	static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
+	static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
+	static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
+	static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
+
+public:
+	static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
+	static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
+	static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
+};
--- a/SmartCrop/tokenize.cpp
+++ b/SmartCrop/tokenize.cpp
--- a/SmartCrop/tokenize.h
+++ b/SmartCrop/tokenize.h
--- a/SmartCrop/utils.cpp
+++ b/SmartCrop/utils.cpp
--- a/SmartCrop/utils.h
+++ b/SmartCrop/utils.h
--- a/SmartCrop/yolo.cpp
+++ b/SmartCrop/yolo.cpp
@ -11,8 +11,8 @@
 #define INCBIN_PREFIX r
 #include "incbin.h"

-INCTXT(defaultClasses, "../classes.txt");
-INCBIN(defaultModel, "../yolov8x.onnx");
+INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
+INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");

 Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
 		const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
@ -22,6 +22,7 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu

 	if(classesTxtFilePath.empty())
 	{
+		Log(Log::INFO)<<"Using builtin classes";
 		loadClasses(rdefaultClassesData);
 	}
 	else
@ -31,19 +32,21 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
 	}

 	if(!modelPath.empty())
+	{
 		net = cv::dnn::readNetFromONNX(modelPath);
+	}
 	else
+	{
+		Log(Log::INFO)<<"Using builtin yolo model";
 		net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
-
+	}
 	if(runWithOCl)
 	{
-		std::cout << "\nRunning on OCV" << std::endl;
 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
 		net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
 	}
 	else
 	{
-		std::cout << "\nRunning on CPU" << std::endl;
 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
 		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
 	}
@ -176,14 +179,33 @@ std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)

 		result.className = classes[result.class_id].first;
 		result.priority = classes[result.class_id].second;
+		clampBox(boxes[idx], input.size());
 		result.box = boxes[idx];
-
 		detections.push_back(result);
 	}

 	return detections;
 }

+
+void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
+{
+	if(box.x < 0)
+	{
+		box.width += box.x;
+		box.x = 0;
+	}
+	if(box.y < 0)
+	{
+		box.height += box.y;
+		box.y = 0;
+	}
+	if(box.x+box.width > size.width)
+		box.width = size.width - box.x;
+	if(box.y+box.height > size.height)
+		box.height = size.height - box.y;
+}
+
 void Yolo::loadClasses(const std::string& classesStr)
 {
 	std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
--- a/SmartCrop/yolo.h
+++ b/SmartCrop/yolo.h
@ -27,19 +27,16 @@ private:
 	static constexpr float modelScoreThreshold = 0.45;
 	static constexpr float modelNMSThreshold = 0.50;

+	std::string modelPath;
+	std::vector<std::pair<std::string, int>> classes;
+	cv::Size2f modelShape;
+	bool letterBoxForSquare = true;
+	cv::dnn::Net net;
+
 	void loadClasses(const std::string& classes);
 	void loadOnnxNetwork(const std::filesystem::path& path);
 	cv::Mat formatToSquare(const cv::Mat &source);
-
-	std::string modelPath;
-
-	std::vector<std::pair<std::string, int>> classes;
-
-	cv::Size2f modelShape;
-
-	bool letterBoxForSquare = true;
-
-	cv::dnn::Net net;
+	static void clampBox(cv::Rect& box, const cv::Size& size);

 public:
 	Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
--- a/Weights/classes.txt
+++ b/Weights/classes.txt
@ -0,0 +1,80 @@
+person, 10
+bicycle, 4
+car, 3
+motorcycle, 4
+airplane, 4
+bus, 4
+train, 4
+truck, 3
+boat, 4
+traffic light, 1
+fire hydrant, 1
+stop sign, 1
+parking meter, 1
+bench, 2
+bird, 5
+cat, 6
+dog, 5
+horse, 4
+sheep, 5
+cow, 4
+elephant, 5
+bear, 5
+zebra, 5
+giraffe, 5
+backpack, 3
+umbrella, 3
+handbag, 3
+tie, 3
+suitcase, 2
+frisbee, 3
+skis, 3
+snowboard, 3
+sports ball, 3
+kite, 4
+baseball bat, 3
+baseball glove, 3
+skateboard, 3
+surfboard, 3
+tennis racket, 3
+bottle, 2
+wine glass, 2
+cup, 2
+fork, 1
+knife, 1
+spoon, 1
+bowl, 1
+banana, 1
+apple, 1
+sandwich,1
+orange, 1
+broccoli, 1
+carrot, 1
+hot dog, 1
+pizza, 1
+donut, 2
+cake, 2
+chair, 1
+couch, 1
+potted plant, 1
+bed, 1
+dining table, 1
+toilet, 1
+tv, 1
+laptop, 1
+mouse, 1
+remote, 1
+keyboard, 1
+cell phone, 1
+microwave, 1
+oven, 1
+toaster, 1
+sink, 1
+refrigerator, 1
+book, 1
+clock, 1
+vase, 1
+scissors, 1
+teddy bear, 1
+hair drier, 1
+toothbrush, 1
--- a/Weights/face_detection_yunet_2023mar.onnx
+++ b/Weights/face_detection_yunet_2023mar.onnx
--- a/Weights/face_recognition_sface_2021dec.onnx
+++ b/Weights/face_recognition_sface_2021dec.onnx
--- a/Weights/yolov8x.onnx
+++ b/Weights/yolov8x.onnx
--- a/main.cpp
+++ b/main.cpp
@ -1,295 +0,0 @@
-#include <filesystem>
-#include <iostream>
-#include <opencv2/core/types.hpp>
-#include <opencv2/imgproc.hpp>
-#include <algorithm>
-#include <vector>
-
-#include "yolo.h"
-#include "log.h"
-#include "options.h"
-#include "utils.h"
-#include "intelligentroi.h"
-
-const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
-{
-	const Yolo::Detection* inDetection = nullptr;
-	for(const Yolo::Detection& detection : detections)
-	{
-		if(!ignore || ignore != &detection)
-			continue;
-
-		if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
-		{
-			if(!inDetection || detection.box.br().x > inDetection->box.br().x)
-			inDetection = &detection;
-		}
-	}
-	return inDetection;
-}
-
-bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
-{
-	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
-
-	if(!inDetection)
-	{
-		const Yolo::Detection* closest = nullptr;
-		for(const Yolo::Detection& detection : detections)
-		{
-			if(detection.box.x > x)
-			{
-				if(closest == nullptr || detection.box.x-x > closest->box.x-x)
-					closest = &detection;
-			}
-		}
-		if(closest)
-			x = closest->box.x;
-		else
-			x = imgSizeX;
-		return false;
-	}
-	else
-	{
-		x = inDetection->box.br().x;
-		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
-		if(candidateDetection && candidateDetection->box.br().x > x)
-			return findRegionEndpointHoriz(x, detections, imgSizeX);
-		else
-			return true;
-	}
-}
-
-std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
-{
-	std::vector<std::pair<cv::Mat, bool>> out;
-
-	for(int x = 0; x < image.cols; ++x)
-	{
-		int start = x;
-		bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
-
-		cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
-		out.push_back({slice, frozen});
-	}
-
-	return out;
-}
-
-const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
-{
-	const Yolo::Detection* inDetection = nullptr;
-	for(const Yolo::Detection& detection : detections)
-	{
-		if(!ignore || ignore != &detection)
-			continue;
-
-		if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
-		{
-			if(!inDetection || detection.box.br().y > inDetection->box.br().y)
-			inDetection = &detection;
-		}
-	}
-	return inDetection;
-}
-
-bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
-{
-	const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
-
-	if(!inDetection)
-	{
-		const Yolo::Detection* closest = nullptr;
-		for(const Yolo::Detection& detection : detections)
-		{
-			if(detection.box.y > y)
-			{
-				if(closest == nullptr || detection.box.y-y > closest->box.y-y)
-					closest = &detection;
-			}
-		}
-		if(closest)
-			y = closest->box.y;
-		else
-			y = imgSizeY;
-		return false;
-	}
-	else
-	{
-		y = inDetection->box.br().y;
-		const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
-		if(candidateDetection && candidateDetection->box.br().y > y)
-			return findRegionEndpointVert(y, detections, imgSizeY);
-		else
-			return true;
-	}
-}
-
-std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
-{
-	std::vector<std::pair<cv::Mat, bool>> out;
-
-	for(int y = 0; y < image.rows; ++y)
-	{
-		int start = y;
-		bool frozen = findRegionEndpointVert(y, detections, image.rows);
-
-		cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
-		out.push_back({slice, frozen});
-	}
-
-	return out;
-}
-
-bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
-{
-	double aspectRatio = image.cols/static_cast<double>(image.rows);
-
-	bool vertical = false;
-	cv::Mat workImage;
-	if(aspectRatio > targetAspectRatio)
-		vertical = true;
-
-	int requiredLines = 0;
-	if(!vertical)
-		requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
-	else
-		requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
-
-	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
-
-	if(!vertical)
-	{
-		std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
-		int totalResizableSize = 0;
-		for(const std::pair<cv::Mat, bool>& slice : slices)
-		{
-			if(slice.second)
-				totalResizableSize += slice.first.cols;
-		}
-
-		std::vector<int> seamsForSlice(slices.size());
-		for(size_t i = 0; i < slices.size(); ++i)
-		{
-			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
-		}
-	}
-	else
-	{
-		int totalResizableSize = 0;
-		std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
-	}
-
-}
-
-void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
-{
-	for(const Yolo::Detection& detection : detections)
-	{
-		cv::rectangle(image, detection.box, detection.color, 4);
-		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
-		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
-		cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
-		cv::rectangle(image, textBox, detection.color, cv::FILLED);
-		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
-	}
-
-	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
-}
-
-int main(int argc, char* argv[])
-{
-	Log::level = Log::INFO;
-
-	Config config;
-	argp_parse(&argp, argc, argv, 0, 0, &config);
-
-	if(config.outputDir.empty())
-	{
-		Log(Log::ERROR)<<"a output path \"-o\" is required";
-		return 1;
-	}
-
-	if(config.imagePaths.empty())
-	{
-		Log(Log::ERROR)<<"at least one input image or directory is required";
-		return 1;
-	}
-
-	std::vector<std::filesystem::path> imagePaths;
-
-	for(const std::filesystem::path& path : config.imagePaths)
-		getImageFiles(path, imagePaths);
-
-	if(imagePaths.empty())
-	{
-		Log(Log::ERROR)<<"no image was found\n";
-		return 1;
-	}
-
-	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
-	InteligentRoi intRoi(yolo);
-
-	if(!std::filesystem::exists(config.outputDir))
-	{
-		if(!std::filesystem::create_directory(config.outputDir))
-		{
-			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
-			return 1;
-		}
-	}
-
-	std::filesystem::path debugOutputPath(config.outputDir/"debug");
-	if(config.debug)
-	{
-		if(!std::filesystem::exists(debugOutputPath))
-			std::filesystem::create_directory(debugOutputPath);
-	}
-
-	for(const std::filesystem::path& path : imagePaths)
-	{
-		cv::Mat image = cv::imread(path);
-		if(!image.data)
-		{
-			Log(Log::WARN)<<"could not load image "<<path<<" skipping";
-			continue;
-		}
-
-		if(std::max(image.cols, image.rows) > 1024)
-		{
-			if(image.cols > image.rows)
-			{
-				double ratio = 1024.0/image.cols;
-				cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
-			}
-			else
-			{
-				double ratio = 1024.0/image.rows;
-				cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
-			}
-		}
-
-		std::vector<Yolo::Detection> detections = yolo.runInference(image);
-
-		Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
-		for(const Yolo::Detection& detection : detections)
-			Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
-
-
-		cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
-
-		cv::Mat debugImage = image.clone();
-		drawDebugInfo(debugImage, crop, detections);
-		bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
-		if(!ret)
-			Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
-
-		cv::Mat croppedImage = image(crop);
-		cv::Mat resizedImage;
-		cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
-		ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
-		if(!ret)
-			Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
-	}
-	return 0;
-}
--- a/options.h
+++ b/options.h
@ -1,70 +0,0 @@
-#pragma once
-#include <string>
-#include <vector>
-#include <argp.h>
-#include <iostream>
-#include <filesystem>
-#include "log.h"
-
-const char *argp_program_version = "AIImagePreprocesses";
-const char *argp_program_bug_address = "<carl@uvos.xyz>";
-static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
-static char args_doc[] = "[IMAGES]";
-
-static struct argp_option options[] =
-{
-  {"verbose",		'v', 0,				0,	"Show debug messages" },
-  {"quiet", 		'q', 0,				0,	"only output data" },
-  {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" },
-  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
-  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
-  {"debug", 		'd', 0,				0,	"output debug images" },
-  {"seam-carving", 	's', 0,				0,	"model to train: "}
-};
-
-struct Config
-{
-	std::vector<std::filesystem::path> imagePaths;
-	std::filesystem::path modelPath;
-	std::filesystem::path classesPath;
-	std::filesystem::path outputDir;
-	bool seamCarving = false;
-	bool debug = false;
-};
-
-static error_t parse_opt (int key, char *arg, struct argp_state *state)
-{
-	Config *config = reinterpret_cast<Config*>(state->input);
-	switch (key)
-	{
-	case 'q':
-		Log::level = Log::ERROR;
-		break;
-	case 'v':
-		Log::level = Log::DEBUG;
-		break;
-	case 'm':
-		config->modelPath = arg;
-		break;
-	case 'c':
-		config->classesPath = arg;
-		break;
-	case 'd':
-		config->debug = true;
-		break;
-	case 'o':
-		config->outputDir.assign(arg);
-		break;
-	case 's':
-		config->seamCarving = true;
-		break;
-	case ARGP_KEY_ARG:
-		config->imagePaths.push_back(arg);
-		break;
-	default:
-		return ARGP_ERR_UNKNOWN;
-	}
-	return 0;
-}
-
-static struct argp argp = {options, parse_opt, args_doc, doc};
--- a/seamcarving.h
+++ b/seamcarving.h
@ -1,61 +0,0 @@
-#ifndef __SEAM__CARVING_HPP__
-#define __SEAM__CARVING_HPP__
-
-#include <opencv2/core/core.hpp>
-#define DEBUG 0
-
-class SeamCarving {
-  public:
-    void showImage();
-    const cv::Mat& getFinalImage();
-    virtual void computeNewFinalImage(int pos);
-    void setBlockUpdate(bool bUpdate);
-    bool getBlockUpdateStatus();
-    virtual void showSeamsImg();
-
-  protected:
-    SeamCarving(const cv::Mat &img, int seams, bool grow);
-    void init();
-    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
-    cv::Mat image;
-    cv::Mat finalImage;
-    int seams;
-    bool grow;
-    int sliderMax;
-    int sliderPos;
-    std::vector<std::vector<int>> vecSeams;
-
-  private:
-    cv::Mat GetEnergyImg(const cv::Mat &img);
-    cv::Mat computeGradientMagnitude(const cv::Mat &frame);
-    float intensity(float currIndex, int start, int end);
-    cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
-    std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
-    cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
-    void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
-    cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
-    void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
-    bool blockUpdate = false;
-
-};
-
-class SeamCarvingHorizontal : public SeamCarving
-{
-  public:
-    SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
-  protected:
-    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
-};
-
-class SeamCarvingVertical : public SeamCarving {
-  public:
-    SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
-    virtual void computeNewFinalImage(int pos) override;
-#if DEBUG
-    virtual void showSeamsImg() override;
-#endif
-  protected:
-    virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
-};
-
-#endif // __SEAM__CARVING_HPP__
--- a/seamcarvinghoriz.cpp
+++ b/seamcarvinghoriz.cpp
@ -1,28 +0,0 @@
-#include "seamcarving.h"
-#include <opencv2/imgcodecs.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc.hpp>
-#include <iostream>
-#include <cfloat>
-
-cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
-{
-	cv::Mat retMat = frame.clone();
-	for(int row = 0; row < frame.rows; row++)
-	{
-		for(int col = 0; col < frame.cols; col++)
-		{
-			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
-			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
-			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
-		}
-	}
-	return retMat;
-}
-
-SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
-	SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
-{
-	sliderMax = image.cols;
-	init();
-}
--- a/seamcarvingvert.cpp
+++ b/seamcarvingvert.cpp
@ -1,51 +0,0 @@
-#include "seamcarving.h"
-#include <opencv2/imgcodecs.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc.hpp>
-#include <iostream>
-#include <cfloat>
-
-SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
-	SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
-{
-	sliderMax = image.rows;
-	cv::Mat oldImage = image;
-	image = image.t();
-	init();
-	image = oldImage;
-	finalImage = finalImage.t();
-}
-
-cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
-{
-	cv::Mat retMat = frame.clone();
-	for(int col = 0; col < frame.cols; col++)
-	{
-		for(int row = 0; row < frame.rows; row++)
-		{
-			retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
-			retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
-			retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
-		}
-	}
-	return retMat;
-}
-
-void SeamCarvingVertical::computeNewFinalImage(int pos)
-{
-	cv::Mat oldImage = image;
-	image = image.t();
-	SeamCarving::computeNewFinalImage(pos);
-	image = oldImage;
-	finalImage = finalImage.t();
-}
-
-#if DEBUG
-void SeamCarvingVertical::showSeamsImg()
-{
-	cv::Mat oldImage = this->image;
-	this->image = this->image.t();
-	SeamCarving::showImage();
-	this->image = oldImage;
-}
-#endif
Author	SHA1	Message	Date
uvos	ce3279254f	add inital non-working Danbooru tagger as a addition to llava tagging	2024-06-07 14:09:36 +02:00
uvos	55953bcdb7	Add the option to focus on a spcific person	2024-06-07 14:04:48 +02:00
uvos	f97f4640a9	PersonDatasetAssembler: add the option to mach images that do NOT contain the specified person	2024-06-07 14:04:07 +02:00
uvos	03e2b3119a	Add person dataset assembler, restructure repo	2024-04-05 12:46:06 +02:00
uvos	81475815fb	more effectively multithread	2024-04-05 12:23:11 +02:00
uvos	35cfa8a906	fix yolo network occasinally preditcting a match out side of image bounds	2024-04-05 11:48:10 +02:00
uvos	a279001151	add face recognition support to the system	2024-04-05 11:24:04 +02:00
uvos	b2ffbfa530	add face detection and recognition models	2024-04-05 11:23:50 +02:00
uvos	b3c2d585ae	paralleization wip	2023-06-30 00:48:56 +02:00
Carl Philipp Klemm	f5dad284e6	Wip setream carving	2023-06-29 19:25:31 +02:00