initial commit

2024-06-14 08:54:09 +02:00 · 2024-06-14 08:54:09 +02:00 · cd1e2756bc
commit cd1e2756bc
39 changed files with 4163 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.6)
+project(ImageAiUtils)
+
+set(CMAKE_CXX_STANDARD 17)
+set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
+
+add_subdirectory(SmartCrop)
--- a/DanbooruTagger/DanbooruTagger.py
+++ b/DanbooruTagger/DanbooruTagger.py
@ -0,0 +1,105 @@
+import warnings
+from deepdanbooru_onnx import DeepDanbooru
+from PIL import Image
+import argparse
+import cv2
+import os
+from multiprocessing import Process, Queue
+import json
+from tqdm import tqdm
+
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]):
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		imagebgr = cv2.imread(path)
+		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
+		if image is None:
+			print(f"Warning: could not load {path}")
+		else:
+			image_pil = Image.fromarray(image)
+			yield image_pil, path
+
+
+def pipeline(queue: Queue, image_paths: list[str], device: int):
+	danbooru = DeepDanbooru()
+
+	for path in image_paths:
+		imageprompt = ""
+		tags = danbooru(path)
+		for tag in tags:
+			imageprompt = imageprompt + ", " + tag
+
+		queue.put({"file_name": path, "text": imageprompt})
+
+
+def split_list(input_list, count):
+	target_length = int(len(input_list) / count)
+	for i in range(0, count - 1):
+		yield input_list[i * target_length: (i + 1) * target_length]
+	yield input_list[(count - 1) * target_length: len(input_list)]
+
+
+def save_meta(meta_file, meta, reldir, common_description):
+	meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
+	if common_description is not None:
+		meta["text"] = common_description + meta["text"]
+	meta_file.write(json.dumps(meta) + '\n')
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru")
+	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
+	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
+	parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
+	args = parser.parse_args()
+
+	nparalell = 2
+
+	image_paths = find_image_files(args.image_dir)
+	image_path_chunks = list(split_list(image_paths, nparalell))
+
+	print(f"Will use {nparalell} processies to create tags")
+
+	queue = Queue()
+	processies = list()
+	for i in range(0, nparalell):
+		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i)))
+		processies[-1].start()
+
+	progress = tqdm(desc="Generateing tags", total=len(image_paths))
+	exit = False
+	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
+		while not exit:
+			if not queue.empty():
+				meta = queue.get()
+				save_meta(output_file, meta, args.image_dir, args.common_description)
+				progress.update()
+			exit = True
+			for process in processies:
+				if process.is_alive():
+					exit = False
+					break
+
+		while not queue.empty():
+			meta = queue.get()
+			save_meta(output_file, meta, args.image_dir, args.common_description)
+			progress.update()
+
+	for process in processies:
+		process.join()
+
--- a/DanbooruTagger/deepdanbooru_onnx/init.py
+++ b/DanbooruTagger/deepdanbooru_onnx/init.py
@ -0,0 +1,3 @@
+from .deepdanbooru_onnx import DeepDanbooru
+from .deepdanbooru_onnx import process_image
+__version__ = '0.0.8'
--- a/DanbooruTagger/deepdanbooru_onnx/pycache/init.cpython-312.pyc
+++ b/DanbooruTagger/deepdanbooru_onnx/pycache/init.cpython-312.pyc
--- a/DanbooruTagger/deepdanbooru_onnx/pycache/deepdanbooru_onnx.cpython-312.pyc
+++ b/DanbooruTagger/deepdanbooru_onnx/pycache/deepdanbooru_onnx.cpython-312.pyc
--- a/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
+++ b/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
@ -0,0 +1,244 @@
+import onnxruntime as ort
+from PIL import Image
+import numpy as np
+import os
+from tqdm import tqdm
+import requests
+import hashlib
+from typing import List, Union
+import shutil
+from pathlib import Path
+
+
+def process_image(image: Image.Image) -> np.ndarray:
+    """
+    Convert an image to a numpy array.
+    :param image: the image to convert
+    :return: the numpy array
+    """
+
+    image = image.convert("RGB").resize((512, 512))
+    image = np.array(image).astype(np.float32) / 255
+    image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
+    return image
+
+
+def download(url: str, save_path: str, md5: str, length: str) -> bool:
+    """
+    Download a file from url to save_path.
+    If the file already exists, check its md5.
+    If the md5 matches, return True,if the md5 doesn't match, return False.
+    :param url: the url of the file to download
+    :param save_path: the path to save the file
+    :param md5: the md5 of the file
+    :param length: the length of the file
+    :return: True if the file is downloaded successfully, False otherwise
+    """
+
+    try:
+        response = requests.get(url=url, stream=True)
+        with open(save_path, "wb") as f:
+            with tqdm.wrapattr(
+                response.raw, "read", total=length, desc="Downloading"
+            ) as r_raw:
+                shutil.copyfileobj(r_raw, f)
+        return (
+            True
+            if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
+            else False
+        )
+    except Exception as e:
+        print(e)
+        return False
+
+
+def download_model():
+    """
+    Download the model and tags file from the server.
+    :return: the path to the model and tags file
+    """
+
+    model_url = (
+        "https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
+    )
+    tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
+    model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
+    tags_md5 = "a3f764de985cdeba89f1d232a4204402"
+    model_length = 643993025
+    tags_length = 133810
+
+    home = str(Path.home()) + "/.deepdanbooru_onnx/"
+    if not os.path.exists(home):
+        os.mkdir(home)
+
+    model_name = "deepdanbooru.onnx"
+    tags_name = "tags.txt"
+
+    model_path = home + model_name
+    tags_path = home + tags_name
+    if os.path.exists(model_path):
+        if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
+            os.remove(model_path)
+            if not download(model_url, model_path, model_md5, model_length):
+                raise ValueError("Model download failed")
+
+    else:
+        if not download(model_url, model_path, model_md5, model_length):
+            raise ValueError("Model download failed")
+
+    if os.path.exists(tags_path):
+        if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
+            os.remove(tags_path)
+            if not download(tags_url, tags_path, tags_md5, tags_length):
+                raise ValueError("Tags download failed")
+    else:
+        if not download(tags_url, tags_path, tags_md5, tags_length):
+            raise ValueError("Tags download failed")
+    return model_path, tags_path
+
+
+class DeepDanbooru:
+    def __init__(
+        self,
+        mode: str = "auto",
+        model_path: Union[str, None] = None,
+        tags_path: Union[str, None] = None,
+        threshold: Union[float, int] = 0.6,
+        pin_memory: bool = False,
+        batch_size: int = 1,
+    ):
+        """
+        Initialize the DeepDanbooru class.
+        :param mode: the mode of the model, "cpu" or "gpu" or "auto"
+        :param model_path: the path to the model file
+        :param tags_path: the path to the tags file
+        :param threshold: the threshold of the model
+        :param pin_memory: whether to use pin memory
+        :param batch_size: the batch size of the model
+        """
+
+        providers = {
+            "cpu": "CPUExecutionProvider",
+            "gpu": "CUDAExecutionProvider",
+            "tensorrt": "TensorrtExecutionProvider",
+            "auto": (
+                "CUDAExecutionProvider"
+                if "CUDAExecutionProvider" in ort.get_available_providers()
+                else "CPUExecutionProvider"
+            ),
+        }
+
+        if not (isinstance(threshold, float) or isinstance(threshold, int)):
+            raise TypeError("threshold must be float or int")
+        if threshold < 0 or threshold > 1:
+            raise ValueError("threshold must be between 0 and 1")
+        if mode not in providers:
+            raise ValueError(
+                "Mode not supported. Please choose from: cpu, gpu, tensorrt"
+            )
+        if providers[mode] not in ort.get_available_providers():
+            raise ValueError(
+                f"Your device is not supported {mode}. Please choose from: cpu"
+            )
+        if model_path is not None and not os.path.exists(model_path):
+            raise FileNotFoundError("Model file not found")
+        if tags_path is not None and not os.path.exists(tags_path):
+            raise FileNotFoundError("Tags file not found")
+
+        if model_path is None or tags_path is None:
+            model_path, tags_path = download_model()
+
+        self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
+        self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
+
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = [output.name for output in self.session.get_outputs()]
+        self.threshold = threshold
+        self.pin_memory = pin_memory
+        self.batch_size = batch_size
+        self.mode = mode
+        self.cache = {}
+
+    def __str__(self) -> str:
+        return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+    def from_image_inference(self, image: Image.Image) -> dict:
+        image = process_image(image)
+        return self.predict(image)
+
+    def from_ndarray_inferece(self, image: np.ndarray) -> dict:
+        if image.shape != (1, 512, 512, 3):
+            raise ValueError(f"Image must be {(1, 512, 512, 3)}")
+        return self.predict(image)
+
+    def from_file_inference(self, image: str) -> dict:
+        return self.from_image_inference(Image.open(image))
+
+    def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
+        if self.pin_memory:
+            image = [process_image(Image.open(i)) for i in image]
+        for i in [
+            image[i : i + self.batch_size]
+            for i in range(0, len(image), self.batch_size)
+        ]:
+            imagelist = i
+            bs = len(i)
+            _imagelist, idx, hashlist = [], [], []
+            for j in range(len(i)):
+                img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
+                image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
+                hashlist.append(image_hash)
+                if image_hash in self.cache:
+                    continue
+                if not self.pin_memory:
+                    _imagelist.append(process_image(img))
+                else:
+                    _imagelist.append(imagelist[j])
+                idx.append(j)
+
+            imagelist = _imagelist
+            if len(imagelist) != 0:
+                _image = np.vstack(imagelist)
+                results = self.inference(_image)
+                results_idx = 0
+            else:
+                results = []
+
+            for i in range(bs):
+                image_tag = {}
+                if i in idx:
+                    hash = hashlist[i]
+                    for tag, score in zip(self.tags, results[results_idx]):
+                        if score >= self.threshold:
+                            image_tag[tag] = score
+                    results_idx += 1
+                    self.cache[hash] = image_tag
+                    yield image_tag
+                else:
+                    yield self.cache[hashlist[i]]
+
+    def inference(self, image):
+        return self.session.run(self.output_name, {self.input_name: image})[0]
+
+    def predict(self, image):
+        result = self.inference(image)
+        image_tag = {}
+        for tag, score in zip(self.tags, result[0]):
+            if score >= self.threshold:
+                image_tag[tag] = score
+        return image_tag
+
+    def __call__(self, image) -> Union[dict, List[dict]]:
+        if isinstance(image, str):
+            return self.from_file_inference(image)
+        elif isinstance(image, np.ndarray):
+            return self.from_ndarray_inferece(image)
+        elif isinstance(image, list) or isinstance(image, tuple):
+            return self.from_list_inference(image)
+        elif isinstance(image, Image.Image):
+            return self.from_image_inference(image)
+        else:
+            raise ValueError("Image must be a file path or a numpy array or list/tuple")
--- a/674
+++ b/674
@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
--- a/LLavaTagger/LLavaTagger.py
+++ b/LLavaTagger/LLavaTagger.py
@ -0,0 +1,142 @@
+import warnings
+warnings.simplefilter(action='ignore')
+from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig, logging
+import argparse
+import cv2
+import torch
+import os
+import numpy
+from typing import Iterator
+from torch.multiprocessing import Process, Queue
+import json
+from tqdm import tqdm
+
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		imagebgr = cv2.imread(path)
+		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
+		if image is None:
+			print(f"Warning: could not load {path}")
+		else:
+			yield image, path
+
+
+def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
+	model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
+		quantization_config=BitsAndBytesConfig(
+			load_in_4bit=True,
+			bnb_4bit_compute_dtype=torch.float16,
+			bnb_4bit_use_double_quant=False,
+			bnb_4bit_quant_type='nf4',
+			), device_map=device, attn_implementation="flash_attention_2")
+	processor = AutoProcessor.from_pretrained(model_name_or_path)
+	image_generator = image_loader(image_paths)
+
+	stop = False
+	finished_count = 0
+	while not stop:
+		prompts = list()
+		images = list()
+		filenames = list()
+		for i in range(0, batch_size):
+			image, filename = next(image_generator, (None, None))
+			if image is None:
+				stop = True
+				break
+
+			filenames.append(filename)
+			images.append(image)
+			prompts.append(prompt)
+
+		if len(images) == 0:
+			break
+
+		inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
+		generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
+		decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+		finished_count += len(images)
+		for i, decoded in enumerate(decodes):
+			trim = len(prompt) - len("<image>")
+			queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
+
+
+def split_list(input_list, count):
+	target_length = int(len(input_list) / count)
+	for i in range(0, count - 1):
+		yield input_list[i * target_length: (i + 1) * target_length]
+	yield input_list[(count - 1) * target_length: len(input_list)]
+
+
+def save_meta(meta_file, meta, reldir, common_description):
+	meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
+	if common_description is not None:
+		meta["text"] = common_description + meta["text"]
+	meta_file.write(json.dumps(meta) + '\n')
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("A script to tag images via llava")
+	parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
+	parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
+	parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
+	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
+	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
+	parser.add_argument('--image_dir', '-i', required=True, help="A directory containg the images to tag")
+	args = parser.parse_args()
+
+	prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
+	os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+
+	image_paths = find_image_files(args.image_dir)
+	image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
+
+	print(f"Will use {torch.cuda.device_count()} processies to create tags")
+
+	logging.set_verbosity_error()
+	warnings.filterwarnings("ignore")
+	torch.multiprocessing.set_start_method('spawn')
+
+	queue = Queue()
+	processies = list()
+	for i in range(0, torch.cuda.device_count()):
+		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
+		processies[-1].start()
+
+	progress = tqdm(desc="Generateing tags", total=len(image_paths))
+	exit = False
+	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
+		while not exit:
+			if not queue.empty():
+				meta = queue.get()
+				save_meta(output_file, meta, args.image_dir, args.common_description)
+				progress.update()
+			exit = True
+			for process in processies:
+				if process.is_alive():
+					exit = False
+					break
+
+		while not queue.empty():
+			meta = queue.get()
+			save_meta(output_file, meta, args.image_dir, args.common_description)
+			progress.update()
+
+	for process in processies:
+		process.join()
+
--- a/LLavaTagger/README.md
+++ b/LLavaTagger/README.md
@ -0,0 +1,21 @@
+# LLavaTagger
+
+LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
+
+## How to use
+
+first create a python venv and install the required packages into it:
+
+	$ python -m venv venv
+	$ source venv/bin/activate
+	$ pip install -r requirements.txt
+
+Then run LLavaTagger for instance like so:
+
+	$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images
+
+By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus
+
+LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss)
+
+If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose
--- a/LLavaTagger/requirements.txt
+++ b/LLavaTagger/requirements.txt
@ -0,0 +1,11 @@
+accelerate==0.29.0
+bitsandbytes
+huggingface-hub==0.22.2
+ninja==1.11.1.1
+safetensors==0.4.2
+tokenizers==0.15.2
+transformers
+torch
+opencv-python
+numpy
+tqdm
--- a/PersonDatasetAssembler/PersonDatasetAssembler.py
+++ b/PersonDatasetAssembler/PersonDatasetAssembler.py
@ -0,0 +1,174 @@
+#!/bin/python3
+
+# PersonDatasetAssembler - A tool to assmble images of a specific person from a
+# directory of images or from a video file
+# Copyright (C) 2024 Carl Philipp Klemm
+#
+# This file is part of PersonDatasetAssembler.
+#
+# PersonDatasetAssembler is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# PersonDatasetAssembler is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with PersonDatasetAssembler.  If not, see <http://www.gnu.org/licenses/>.
+
+import argparse
+import os
+from typing import Iterator
+import cv2
+import numpy
+from tqdm import tqdm
+from wand.exceptions import BlobError
+from wand.image import Image
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+image_ext_wand = [".dng", ".arw"]
+
+
+class LoadException(Exception):
+	pass
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv or extension in image_ext_wand:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		if extension in image_ext_ocv:
+			image = cv2.imread(path)
+			if image is None:
+				print(f"Warning: could not load {path}")
+			else:
+				yield image
+		elif extension in image_ext_wand:
+			try:
+				image = Image(filename=path)
+			except BlobError as e:
+				print(f"Warning: could not load {path}, {e}")
+				continue
+
+
+def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
+	ret = True
+	frame_counter = 0
+	while ret:
+		video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
+		ret, frame = video.read()
+		if ret:
+			yield frame
+		frame_counter += interval
+
+
+def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
+	detector.setInputSize([image.shape[1], image.shape[0]])
+	faces = detector.detect(image)[1]
+	if faces is None:
+		return 0, False
+	for face in faces:
+		cropped_image = recognizer.alignCrop(image, face)
+		features = recognizer.feature(cropped_image)
+		score_accum = 0.0
+		for referance in referance_features:
+			score_accum += recognizer.match(referance, features, 0)
+		score = score_accum / len(referance_features)
+		if score > thresh:
+			return score, True
+	return 0, False
+
+
+def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
+	images = list()
+	out = list()
+
+	if os.path.isfile(referance_path):
+		image = cv2.imread(referance_path)
+		if image is None:
+			print(f"Could not load image from {referance_path}")
+		else:
+			images.append(image)
+	elif os.path.isdir(referance_path):
+		filenames = find_image_files(referance_path)
+		images = list(image_loader(filenames))
+
+	for image in images:
+		detector.setInputSize([image.shape[1], image.shape[0]])
+		faces = detector.detect(image)[1]
+		if faces is None:
+			print("unable to find face in referance image")
+			exit(1)
+		image = recognizer.alignCrop(image, faces[0])
+		features = recognizer.feature(image)
+		out.append(features)
+
+	return out
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
+	parser.add_argument('--out', '-o', default="out", help="place to put dataset")
+	parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
+	parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
+	parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
+	parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
+	parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
+	parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
+	parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
+	args = parser.parse_args()
+
+	recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
+	detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
+		score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
+
+	referance_features = process_referance(detector, recognizer, args.referance)
+	if len(referance_features) < 1:
+		print(f"Could not load any referance image(s) from {args.referance}")
+		exit(1)
+
+	if os.path.isfile(args.input):
+		video = cv2.VideoCapture(args.input)
+		if not video.isOpened():
+			print(f"Unable to open {args.input} as a video file")
+			exit(1)
+		image_generator = extract_video_images(video, args.skip + 1)
+		total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
+	elif os.path.isdir(args.input):
+		image_filenams = find_image_files(args.input)
+		image_generator = image_loader(image_filenams)
+		total_images = len(image_filenams)
+	else:
+		print(f"{args.input} is not a video file nor is it a directory")
+		exit(1)
+
+	os.makedirs(args.out, exist_ok=True)
+
+	progress = tqdm(total=int(total_images), desc="0.00")
+	counter = 0
+	for image in image_generator:
+		if image.shape[0] > 512:
+			aspect = image.shape[0] / image.shape[1]
+			resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
+		else:
+			resized = image
+		score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
+		if match and not args.invert or not match and args.invert:
+			filename = f"{counter:04}.png"
+			cv2.imwrite(os.path.join(args.out, filename), image)
+			counter += 1
+		progress.set_description(f"{score:1.2f}")
+		progress.update()
+
--- a/PersonDatasetAssembler/README.md
+++ b/PersonDatasetAssembler/README.md
@ -0,0 +1,20 @@
+### PersonDatasetAssembler
+
+PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
+
+## How to use
+
+first create a python venv and install the required packages into it:
+
+	$ python -m venv venv
+	$ source venv/bin/activate
+	$ pip install -r requirements.txt
+
+Then run PersonDatasetAssembler for instance like so:
+
+	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson
+
+Or to extract images from a video:
+
+	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson
+
--- a/PersonDatasetAssembler/requirements.txt
+++ b/PersonDatasetAssembler/requirements.txt
@ -0,0 +1,4 @@
+numpy==1.26.4
+opencv-python==4.10.0.82
+tqdm==4.66.4
+Wand==0.6.13
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
+# SDImagePreprocess
+
+This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion.
+
+## Included tools
+
+This repo contains the following tools:
+
+### SmartCrop
+
+SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
+
+#### Content detected in image:
+
+![Content found in image](SmartCrop/images/IMGP3692.jpg)
+
+#### Cropped image based on content:
+![Cropped image](SmartCrop/images/IMGP3692C.jpg)
+
+### PersonDatasetAssembler
+
+PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
+
+### LLavaTagger
+
+LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
+
+### DanbooruTagger
+
+DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network.
+
+
+## License
+
+All files in this repo are litcenced GPL V3, see LICENSE
--- a/SmartCrop/CMakeLists.txt
+++ b/SmartCrop/CMakeLists.txt
@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.6)
+
+find_package(OpenCV REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+
+set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
+
+add_executable(smartcrop ${SRC_FILES})
+target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
+target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
+target_compile_options(smartcrop PRIVATE -s -g -Wall)
+message(WARNING ${WEIGHT_DIR})
+target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
+
+install(TARGETS smartcrop RUNTIME DESTINATION bin)
--- a/SmartCrop/README.md
+++ b/SmartCrop/README.md
@ -0,0 +1,50 @@
+# SmartCrop
+
+SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
+
+## Requirements
+
+* [cmake](https://cmake.org/) 3.6 or later
+* [opencv](https://opencv.org/) 4.8 or later
+* A c++17 capable compiler and standard lib like gcc or llvm/clang
+* git is required to get the source
+
+## Building
+
+The steps to build this application are:
+
+	$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
+	$ cd SDImagePreprocess
+	$ mkdir build
+	$ cmake ..
+	$ make
+
+The binary can then be found in build/SmartCrop and can optionaly be installed with:
+
+	$ sudo make install
+
+## Basic usage
+
+To process all images in the directory ~/images and output the images into ~/proceesedImages:
+
+	$ smartcrop --out processedImages ~/images/*
+
+To also focus on the person in the image ~/person.jpg
+
+	$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
+
+To also enable seam carving
+
+	$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
+
+see smartcrop --help for more
+
+## Example
+
+#### Content detected in image:
+![Content found in image](images/IMGP3692.jpg)
+
+#### Cropped image based on content:
+![Cropped image](images/IMGP3692C.jpg)
+
+
--- a/SmartCrop/facerecognizer.cpp
+++ b/SmartCrop/facerecognizer.cpp
@ -0,0 +1,163 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "facerecognizer.h"
+#include <filesystem>
+
+#define INCBIN_PREFIX r
+#include "incbin.h"
+
+INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
+INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
+
+#include <opencv2/dnn/dnn.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <fstream>
+
+#include "log.h"
+
+static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
+
+FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
+{
+	if(detectorPath.empty())
+	{
+		Log(Log::INFO)<<"Using builtin face detection model";
+
+		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from built in file");
+	}
+	else
+	{
+		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+		if(!detector)
+			throw LoadException("Unable to load detector network from "+detectorPath.string());
+	}
+
+	bool defaultNetwork = recognizerPath.empty();
+
+	if(defaultNetwork)
+	{
+		Log(Log::INFO)<<"Using builtin face recognition model";
+		recognizerPath = cv::tempfile("onnx");
+		std::ofstream file(recognizerPath);
+		if(!file.is_open())
+			throw LoadException("Unable open temporary file at "+recognizerPath.string());
+		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
+		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
+		file.close();
+	}
+
+	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
+
+	if(defaultNetwork)
+		std::filesystem::remove(recognizerPath);
+
+	if(!recognizer)
+		throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
+
+	addReferances(referances);
+}
+
+cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
+{
+	detector->setInputSize(input.size());
+	cv::Mat faces;
+	detector->detect(input, faces);
+	return faces;
+}
+
+bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
+{
+	bool ret = false;
+	for(const cv::Mat& image : referances)
+	{
+		cv::Mat faces = detectFaces(image);
+		assert(faces.cols == 15);
+		if(faces.empty())
+		{
+			Log(Log::WARN)<<"A referance image provided dose not contian any face";
+			continue;
+		}
+		if(faces.rows > 1)
+			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
+		cv::Mat cropedImage;
+		recognizer->alignCrop(image, faces.row(0), cropedImage);
+		cv::Mat features;
+		recognizer->feature(cropedImage, features);
+		referanceFeatures.push_back(features.clone());
+		ret = true;
+	}
+
+	return ret;
+}
+
+void FaceRecognizer::setThreshold(double threasholdIn)
+{
+	threshold = threasholdIn;
+}
+
+double FaceRecognizer::getThreshold()
+{
+	return threshold;
+}
+
+void FaceRecognizer::clearReferances()
+{
+	referanceFeatures.clear();
+}
+
+FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
+{
+	cv::Mat faces = detectFaces(input);
+
+	Detection bestMatch;
+	bestMatch.confidence = 0;
+	bestMatch.person = -1;
+
+	if(alone && faces.rows > 1)
+	{
+		bestMatch.person = -2;
+		return bestMatch;
+	}
+
+	for(int i = 0; i < faces.rows; ++i)
+	{
+		cv::Mat face;
+		recognizer->alignCrop(input, faces.row(i), face);
+		cv::Mat features;
+		recognizer->feature(face, features);
+		features = features.clone();
+		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
+		{
+			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
+			if(score > threshold && score > bestMatch.confidence)
+			{
+				bestMatch.confidence = score;
+				bestMatch.person = referanceIndex;
+				bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
+			}
+		}
+	}
+
+	return bestMatch;
+}
--- a/SmartCrop/facerecognizer.h
+++ b/SmartCrop/facerecognizer.h
@ -0,0 +1,67 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <exception>
+#include <opencv2/core/mat.hpp>
+#include <opencv2/objdetect/face.hpp>
+#include <opencv2/core.hpp>
+#include <vector>
+#include <memory>
+#include <filesystem>
+
+class FaceRecognizer
+{
+public:
+
+	struct Detection
+	{
+		int person;
+		float confidence;
+		cv::Rect rect;
+	};
+
+	class LoadException : public std::exception
+	{
+	private:
+		std::string message;
+	public:
+		LoadException(const std::string& msg): std::exception(), message(msg) {}
+		virtual const char* what() const throw() override
+		{
+			return message.c_str();
+		}
+	};
+
+private:
+	std::vector<cv::Mat> referanceFeatures;
+	std::shared_ptr<cv::FaceRecognizerSF> recognizer;
+	std::shared_ptr<cv::FaceDetectorYN> detector;
+
+	double threshold = 0.363;
+
+public:
+	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
+	cv::Mat detectFaces(const cv::Mat& input);
+	Detection isMatch(const cv::Mat& input, bool alone = false);
+	bool addReferances(const std::vector<cv::Mat>& referances);
+	void setThreshold(double threashold);
+	double getThreshold();
+	void clearReferances();
+};
--- a/SmartCrop/images/IMGP3692.jpg
+++ b/SmartCrop/images/IMGP3692.jpg
--- a/SmartCrop/images/IMGP3692C.jpg
+++ b/SmartCrop/images/IMGP3692C.jpg
--- a/SmartCrop/incbin.h
+++ b/SmartCrop/incbin.h
@ -0,0 +1,495 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file incbin.h
+ * @author Dale Weiler
+ * @brief Utility for including binary files
+ *
+ * Facilities for including binary files into the current translation unit and
+ * making use from them externally in other translation units.
+ */
+#ifndef INCBIN_HDR
+#define INCBIN_HDR
+#include <limits.h>
+#if   defined(__AVX512BW__) || \
+      defined(__AVX512CD__) || \
+      defined(__AVX512DQ__) || \
+      defined(__AVX512ER__) || \
+      defined(__AVX512PF__) || \
+      defined(__AVX512VL__) || \
+      defined(__AVX512F__)
+# define INCBIN_ALIGNMENT_INDEX 6
+#elif defined(__AVX__)      || \
+      defined(__AVX2__)
+# define INCBIN_ALIGNMENT_INDEX 5
+#elif defined(__SSE__)      || \
+      defined(__SSE2__)     || \
+      defined(__SSE3__)     || \
+      defined(__SSSE3__)    || \
+      defined(__SSE4_1__)   || \
+      defined(__SSE4_2__)   || \
+      defined(__neon__)     || \
+      defined(__ARM_NEON)   || \
+      defined(__ALTIVEC__)
+# define INCBIN_ALIGNMENT_INDEX 4
+#elif ULONG_MAX != 0xffffffffu
+# define INCBIN_ALIGNMENT_INDEX 3
+# else
+# define INCBIN_ALIGNMENT_INDEX 2
+#endif
+
+/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
+#define INCBIN_ALIGN_SHIFT_0 1
+#define INCBIN_ALIGN_SHIFT_1 2
+#define INCBIN_ALIGN_SHIFT_2 4
+#define INCBIN_ALIGN_SHIFT_3 8
+#define INCBIN_ALIGN_SHIFT_4 16
+#define INCBIN_ALIGN_SHIFT_5 32
+#define INCBIN_ALIGN_SHIFT_6 64
+
+/* Actual alignment value */
+#define INCBIN_ALIGNMENT \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
+        INCBIN_ALIGNMENT_INDEX)
+
+/* Stringize */
+#define INCBIN_STR(X) \
+    #X
+#define INCBIN_STRINGIZE(X) \
+    INCBIN_STR(X)
+/* Concatenate */
+#define INCBIN_CAT(X, Y) \
+    X ## Y
+#define INCBIN_CONCATENATE(X, Y) \
+    INCBIN_CAT(X, Y)
+/* Deferred macro expansion */
+#define INCBIN_EVAL(X) \
+    X
+#define INCBIN_INVOKE(N, ...) \
+    INCBIN_EVAL(N(__VA_ARGS__))
+/* Variable argument count for overloading by arity */
+#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
+#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
+
+/* Green Hills uses a different directive for including binary data */
+#if defined(__ghs__)
+#  if (__ghs_asm == 2)
+#    define INCBIN_MACRO ".file"
+/* Or consider the ".myrawdata" entry in the ld file */
+#  else
+#    define INCBIN_MACRO "\tINCBIN"
+#  endif
+#else
+#  define INCBIN_MACRO ".incbin"
+#endif
+
+#ifndef _MSC_VER
+#  define INCBIN_ALIGN \
+    __attribute__((aligned(INCBIN_ALIGNMENT)))
+#else
+#  define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
+#endif
+
+#if defined(__arm__) || /* GNU C and RealView */ \
+    defined(__arm) || /* Diab */ \
+    defined(_ARM) /* ImageCraft */
+#  define INCBIN_ARM
+#endif
+
+#ifdef __GNUC__
+/* Utilize .balign where supported */
+#  define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".balign 1\n"
+#elif defined(INCBIN_ARM)
+/*
+ * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
+ * the shift count. This is the value passed to `.align'
+ */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 0\n"
+#else
+/* We assume other inline assembler's treat `.align' as `.balign' */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 1\n"
+#endif
+
+/* INCBIN_CONST is used by incbin.c generated files */
+#if defined(__cplusplus)
+#  define INCBIN_EXTERNAL extern "C"
+#  define INCBIN_CONST    extern const
+#else
+#  define INCBIN_EXTERNAL extern
+#  define INCBIN_CONST    const
+#endif
+
+/**
+ * @brief Optionally override the linker section into which size and data is
+ * emitted.
+ * 
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
+ */
+#if !defined(INCBIN_OUTPUT_SECTION)
+#  if defined(__APPLE__)
+#    define INCBIN_OUTPUT_SECTION ".const_data"
+#  else
+#    define INCBIN_OUTPUT_SECTION ".rodata"
+#  endif
+#endif
+
+/**
+ * @brief Optionally override the linker section into which data is emitted.
+ *
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
+ */
+#if !defined(INCBIN_OUTPUT_DATA_SECTION)
+#  define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
+#endif
+
+/**
+ * @brief Optionally override the linker section into which size is emitted.
+ *
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
+ * 
+ * @note This is useful for Harvard architectures where program memory cannot
+ * be directly read from the program without special instructions. With this you
+ * can chose to put the size variable in RAM rather than ROM.
+ */
+#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
+#  define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
+#endif
+
+#if defined(__APPLE__)
+#  include "TargetConditionals.h"
+#  if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
+#    warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
+#  endif
+/* The directives are different for Apple branded compilers */
+#  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  define INCBIN_INT             ".long "
+#  define INCBIN_MANGLE          "_"
+#  define INCBIN_BYTE            ".byte "
+#  define INCBIN_TYPE(...)
+#else
+#  define INCBIN_SECTION         ".section " INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  if defined(__ghs__)
+#    define INCBIN_INT           ".word "
+#  else
+#    define INCBIN_INT           ".int "
+#  endif
+#  if defined(__USER_LABEL_PREFIX__)
+#    define INCBIN_MANGLE        INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
+#  else
+#    define INCBIN_MANGLE        ""
+#  endif
+#  if defined(INCBIN_ARM)
+/* On arm assemblers, `@' is used as a line comment token */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
+#  elif defined(__MINGW32__) || defined(__MINGW64__)
+/* Mingw doesn't support this directive either */
+#    define INCBIN_TYPE(NAME)
+#  else
+/* It's safe to use `@' on other architectures */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
+#  endif
+#  define INCBIN_BYTE            ".byte "
+#endif
+
+/* List of style types used for symbol names */
+#define INCBIN_STYLE_CAMEL 0
+#define INCBIN_STYLE_SNAKE 1
+
+/**
+ * @brief Specify the prefix to use for symbol names.
+ *
+ * @note By default this is "g".
+ *
+ * @code
+ * #define INCBIN_PREFIX incbin
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols instead:
+ * // const unsigned char incbinFoo<data>[];
+ * // const unsigned char *const incbinFoo<end>;
+ * // const unsigned int incbinFoo<size>;
+ * @endcode
+ */
+#if !defined(INCBIN_PREFIX)
+#  define INCBIN_PREFIX g
+#endif
+
+/**
+ * @brief Specify the style used for symbol names.
+ *
+ * Possible options are
+ * - INCBIN_STYLE_CAMEL "CamelCase"
+ * - INCBIN_STYLE_SNAKE "snake_case"
+ *
+ * @note By default this is INCBIN_STYLE_CAMEL
+ *
+ * @code
+ * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
+ * #include "incbin.h"
+ * INCBIN(foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>foo_data[];
+ * // const unsigned char *const <prefix>foo_end;
+ * // const unsigned int <prefix>foo_size;
+ * @endcode
+ */
+#if !defined(INCBIN_STYLE)
+#  define INCBIN_STYLE INCBIN_STYLE_CAMEL
+#endif
+
+/* Style lookup tables */
+#define INCBIN_STYLE_0_DATA Data
+#define INCBIN_STYLE_0_END End
+#define INCBIN_STYLE_0_SIZE Size
+#define INCBIN_STYLE_1_DATA _data
+#define INCBIN_STYLE_1_END _end
+#define INCBIN_STYLE_1_SIZE _size
+
+/* Style lookup: returning identifier */
+#define INCBIN_STYLE_IDENT(TYPE) \
+    INCBIN_CONCATENATE( \
+        INCBIN_STYLE_, \
+        INCBIN_CONCATENATE( \
+            INCBIN_EVAL(INCBIN_STYLE), \
+            INCBIN_CONCATENATE(_, TYPE)))
+
+/* Style lookup: returning string literal */
+#define INCBIN_STYLE_STRING(TYPE) \
+    INCBIN_STRINGIZE( \
+        INCBIN_STYLE_IDENT(TYPE)) \
+
+/* Generate the global labels by indirectly invoking the macro with our style
+ * type and concatenating the name against them. */
+#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
+    INCBIN_INVOKE( \
+        INCBIN_GLOBAL, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE))) \
+    INCBIN_INVOKE( \
+        INCBIN_TYPE, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE)))
+
+/**
+ * @brief Externally reference binary data included in another translation unit.
+ *
+ * Produces three external symbols that reference the binary data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
+ * @param NAME The name given for the binary data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const unsigned char <prefix>Foo<data>[];
+ * // extern const unsigned char *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
+ * @endcode
+ * 
+ * You may specify a custom optional data type as well as the first argument.
+ * @code
+ * INCBIN_EXTERN(custom_type, Foo);
+ * 
+ * // Now you have the following symbols:
+ * // extern const custom_type <prefix>Foo<data>[];
+ * // extern const custom_type *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
+ * @endcode
+ */
+#define INCBIN_EXTERN(...) \
+    INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
+#define INCBIN_EXTERN_1(NAME, ...) \
+    INCBIN_EXTERN_2(unsigned char, NAME)
+#define INCBIN_EXTERN_2(TYPE, NAME) \
+    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(DATA))[]; \
+    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+        INCBIN_STYLE_IDENT(END)); \
+    INCBIN_EXTERNAL const unsigned int \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(SIZE))
+
+/**
+ * @brief Externally reference textual data included in another translation unit.
+ *
+ * Produces three external symbols that reference the textual data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name given for the textual data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const char <prefix>Foo<data>[];
+ * // extern const char *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
+ * @endcode
+ */
+#define INCTXT_EXTERN(NAME) \
+    INCBIN_EXTERN_2(char, NAME)
+
+/**
+ * @brief Include a binary file into the current translation unit.
+ *
+ * Includes a binary file into the current translation unit, producing three symbols
+ * for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCBIN(Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>Icon<data>[];
+ * // const unsigned char *const <prefix>Icon<end>;
+ * // const unsigned int <prefix>Icon<size>;
+ * @endcode
+ * 
+ * You may specify a custom optional data type as well as the first argument.
+ * These macros are specialized by arity.
+ * @code
+ * INCBIN(custom_type, Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const custom_type <prefix>Icon<data>[];
+ * // const custom_type *const <prefix>Icon<end>;
+ * // const unsigned int <prefix>Icon<size>;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#ifdef _MSC_VER
+#  define INCBIN(NAME, FILENAME) \
+      INCBIN_EXTERN(NAME)
+#else
+#  define INCBIN(...) \
+     INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
+#  if defined(__GNUC__)
+#    define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
+#  elif defined(__clang__)
+#    define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
+#  else
+#    define INCBIN_1(...) /* Cannot do anything here */
+#  endif
+#  define INCBIN_2(NAME, FILENAME) \
+      INCBIN_3(unsigned char, NAME, FILENAME)
+#  define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
+#  define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
+    __asm__(INCBIN_SECTION \
+            INCBIN_GLOBAL_LABELS(NAME, DATA) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
+            INCBIN_MACRO " \"" FILENAME "\"\n" \
+                TERMINATOR \
+            INCBIN_GLOBAL_LABELS(NAME, END) \
+            INCBIN_ALIGN_BYTE \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
+                INCBIN_BYTE "1\n" \
+            INCBIN_GLOBAL_LABELS(NAME, SIZE) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
+                INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
+                           INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
+            INCBIN_ALIGN_HOST \
+            ".text\n" \
+    ); \
+    INCBIN_EXTERN(TYPE, NAME)
+#endif
+
+/**
+ * @brief Include a textual file into the current translation unit.
+ * 
+ * This behaves the same as INCBIN except it produces char compatible arrays
+ * and implicitly adds a null-terminator byte, thus the size of data included
+ * by this is one byte larger than that of INCBIN.
+ *
+ * Includes a textual file into the current translation unit, producing three
+ * symbols for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCTXT(Readme, "readme.txt");
+ *
+ * // Now you have the following symbols:
+ * // const char <prefix>Readme<data>[];
+ * // const char *const <prefix>Readme<end>;
+ * // const unsigned int <prefix>Readme<size>;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#if defined(_MSC_VER)
+#  define INCTXT(NAME, FILENAME) \
+     INCBIN_EXTERN(NAME)
+#else
+#  define INCTXT(NAME, FILENAME) \
+     INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
+#endif
+
+#endif
--- a/SmartCrop/intelligentroi.cpp
+++ b/SmartCrop/intelligentroi.cpp
@ -0,0 +1,128 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "intelligentroi.h"
+
+#include <opencv2/imgproc.hpp>
+
+#include "utils.h"
+#include "log.h"
+
+bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center)
+{
+	if(a.second != b.second)
+		return a.second > b.second;
+
+	double distA = pointDist(a.first, center);
+	double distB = pointDist(b.first, center);
+
+	return distA < distB;
+}
+
+void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
+{
+	if(!pointInRect(point, rect))
+	{
+		if(point.x < rect.x)
+			rect.x = point.x;
+		else if(point.x > rect.x+rect.width)
+			rect.x = point.x-rect.width;
+		if(point.y < rect.y)
+			rect.y = point.y;
+		else if(point.y > rect.y+rect.height)
+			rect.y = point.y-rect.height;
+	}
+}
+
+cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
+{
+	incompleate = false;
+	int diameter = std::min(imageSize.height, imageSize.width);
+	cv::Point2i point(imageSize.width/2, imageSize.height/2);
+	cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
+
+	std::sort(mustInclude.begin(), mustInclude.end(),
+		[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
+
+	while(true)
+	{
+		cv::Rect includeRect = rectFromPoints(mustInclude);
+		if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
+		{
+			incompleate = true;
+			slideRectToPoint(candiate, mustInclude.back().first);
+			mustInclude.pop_back();
+			Log(Log::DEBUG)<<"cant fill";
+			for(const std::pair<cv::Point2i, int>& mipoint : mustInclude)
+				Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
+		}
+		else
+		{
+			break;
+		}
+	}
+
+	for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
+		slideRectToPoint(candiate, includePoint.first);
+
+	if(candiate.x < 0)
+		candiate.x = 0;
+	if(candiate.y < 0)
+		candiate.y = 0;
+	if(candiate.x+candiate.width > imageSize.width)
+		candiate.width = imageSize.width-candiate.x;
+	if(candiate.y+candiate.height > imageSize.height)
+		candiate.height = imageSize.height-candiate.y;
+
+	return candiate;
+}
+
+InteligentRoi::InteligentRoi(const Yolo& yolo)
+{
+	personId = yolo.getClassForStr("person");
+}
+
+bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
+{
+	std::vector<std::pair<cv::Point2i, int>> corners;
+	for(size_t i = 0; i < detections.size(); ++i)
+	{
+		int priority = detections[i].priority;
+		if(detections[i].class_id == personId)
+		{
+			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
+			corners.push_back({detections[i].box.tl(), priority+1});
+			corners.push_back({detections[i].box.br(), priority});
+			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
+			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+		}
+		else
+		{
+			corners.push_back({detections[i].box.tl(), priority});
+			corners.push_back({detections[i].box.br(), priority});
+			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
+			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
+		}
+	}
+
+	bool incompleate;
+	out = maxRect(incompleate, imageSize, corners);
+	return incompleate;
+}
--- a/SmartCrop/intelligentroi.h
+++ b/SmartCrop/intelligentroi.h
@ -0,0 +1,37 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <opencv2/imgproc.hpp>
+
+#include "yolo.h"
+
+class InteligentRoi
+{
+private:
+	int personId;
+	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
+	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
+	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
+
+public:
+	InteligentRoi(const Yolo& yolo);
+	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
+};
--- a/SmartCrop/log.cpp
+++ b/SmartCrop/log.cpp
@ -0,0 +1,63 @@
+/**
+* Lubricant Detecter
+* Copyright (C) 2021 Carl Klemm
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* version 3 as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the
+* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+* Boston, MA  02110-1301, USA.
+*/
+
+#include "log.h"
+
+Log::Log(Level type, bool endlineI): endline(endlineI)
+{
+	msglevel = type;
+	if(headers)
+	{
+		operator << ("["+getLabel(type)+"] ");
+	}
+}
+
+Log::~Log()
+{
+	if(opened && endline)
+	{
+		std::cout<<'\n';
+	}
+	opened = false;
+}
+
+
+std::string Log::getLabel(Level level)
+{
+	std::string label;
+	switch(level)
+	{
+		case DEBUG:
+			label = "DEBUG";
+			break;
+		case INFO:
+			label = "INFO ";
+			break;
+		case WARN:
+			label = "WARN ";
+			break;
+		case ERROR:
+			label = "ERROR";
+			break;
+	}
+	return label;
+}
+
+bool Log::headers = false;
+Log::Level Log::level = WARN;
--- a/SmartCrop/log.h
+++ b/SmartCrop/log.h
@ -0,0 +1,64 @@
+/**
+* eisgenerator
+* Copyright (C) 2021 Carl Klemm
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* version 3 as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the
+* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+* Boston, MA  02110-1301, USA.
+*/
+
+#pragma once
+#include <iostream>
+#include <string>
+
+class Log
+{
+public:
+
+	enum Level
+	{
+		DEBUG,
+		INFO,
+		WARN,
+		ERROR
+	};
+
+private:
+	bool opened = false;
+	Level msglevel = DEBUG;
+	bool endline = true;
+
+	std::string getLabel(Level level);
+
+public:
+
+	static bool headers;
+	static Level level;
+
+	Log() {}
+	Log(Level type, bool endlineI = true);
+	~Log();
+
+	template<class T> Log &operator<<(const T &msg)
+	{
+		if(msglevel >= level)
+		{
+			if(msglevel == ERROR)
+				std::cerr<<msg;
+			else
+				std::cout<<msg;
+			opened = true;
+		}
+		return *this;
+	}
+};
--- a/SmartCrop/main.cpp
+++ b/SmartCrop/main.cpp
@ -0,0 +1,460 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <filesystem>
+#include <iostream>
+#include <opencv2/core.hpp>
+#include <opencv2/core/types.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <algorithm>
+#include <execution>
+#include <string>
+#include <vector>
+#include <numeric>
+
+#include "yolo.h"
+#include "log.h"
+#include "options.h"
+#include "utils.h"
+#include "intelligentroi.h"
+#include "seamcarving.h"
+#include "facerecognizer.h"
+
+const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
+{
+	const Yolo::Detection* inDetection = nullptr;
+	for(const Yolo::Detection& detection : detections)
+	{
+		if(ignore && ignore == &detection)
+			continue;
+
+		if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
+		{
+			if(!inDetection || detection.box.br().x > inDetection->box.br().x)
+			inDetection = &detection;
+		}
+	}
+	return inDetection;
+}
+
+bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
+{
+	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
+
+	Log(Log::DEBUG, false)<<__func__<<" point "<<x;
+
+	if(!inDetection)
+	{
+		const Yolo::Detection* closest = nullptr;
+		for(const Yolo::Detection& detection : detections)
+		{
+			if(detection.box.x > x)
+			{
+				if(closest == nullptr || detection.box.x-x > closest->box.x-x)
+					closest = &detection;
+			}
+		}
+		if(closest)
+			x = closest->box.x;
+		else
+			x = imgSizeX;
+
+		Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
+		return false;
+	}
+	else
+	{
+		x = inDetection->box.br().x;
+		Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
+		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
+		if(candidateDetection && candidateDetection->box.br().x > x)
+		{
+			Log(Log::DEBUG)<<"it is again in a box";
+			return findRegionEndpointHoriz(x, detections, imgSizeX);
+		}
+		else
+		{
+			Log(Log::DEBUG)<<"it is not in a box";
+			return true;
+		}
+	}
+}
+
+std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
+{
+	std::vector<std::pair<cv::Mat, bool>> out;
+
+	std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
+
+	for(int x = 0; x < image.cols; ++x)
+	{
+		int start = x;
+		bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
+
+		int width = x-start;
+		if(x < image.cols)
+			++width;
+		cv::Rect rect(start, 0, width, image.rows);
+		Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
+		cv::Mat slice = image(rect);
+		out.push_back({slice, frozen});
+	}
+
+	return out;
+}
+
+cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
+{
+	assert(!slices.empty());
+
+	int cols = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+		cols += slice.first.cols;
+
+
+	cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
+	Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
+
+	int col = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+	{
+		cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
+		Log(Log::DEBUG)<<__func__<<' '<<rect;
+		slice.first.copyTo(image(rect));
+		col += slice.first.cols-1;
+	}
+
+	return image;
+}
+
+void transposeRect(cv::Rect& rect)
+{
+	int x = rect.x;
+	rect.x = rect.y;
+	rect.y = x;
+
+	int width = rect.width;
+	rect.width = rect.height;
+	rect.height = width;
+}
+
+bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
+{
+	detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
+
+	double aspectRatio = image.cols/static_cast<double>(image.rows);
+
+	Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
+
+	bool vertical = false;
+	if(aspectRatio > targetAspectRatio)
+		vertical = true;
+
+	int requiredLines = 0;
+	if(!vertical)
+		requiredLines = image.rows*targetAspectRatio - image.cols;
+	else
+		requiredLines = image.cols/targetAspectRatio - image.rows;
+
+	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
+
+	if(vertical)
+	{
+		cv::transpose(image, image);
+		for(Yolo::Detection& detection : detections)
+			transposeRect(detection.box);
+	}
+
+	std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
+	Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
+	int totalResizableSize = 0;
+	for(const std::pair<cv::Mat, bool>& slice : slices)
+	{
+		Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
+		if(!slice.second)
+			totalResizableSize += slice.first.cols;
+	}
+
+	if(totalResizableSize < requiredLines+1)
+	{
+		Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
+		if(vertical)
+			cv::transpose(image, image);
+		return false;
+	}
+
+	std::vector<int> seamsForSlice(slices.size(), 0);
+	for(size_t i = 0; i < slices.size(); ++i)
+	{
+		if(!slices[i].second)
+			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
+	}
+
+	int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
+	for(ssize_t i = slices.size()-1; i >= 0; --i)
+	{
+		if(!slices[i].second)
+		{
+			seamsForSlice[i] += residual;
+			break;
+		}
+	}
+
+	for(size_t i = 0; i < slices.size(); ++i)
+	{
+		if(seamsForSlice[i] != 0)
+		{
+			bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
+			if(!ret)
+			{
+				if(vertical)
+					transpose(image, image);
+				return false;
+			}
+		}
+	}
+
+	image = assembleFromSlicesHoriz(slices);
+
+	if(vertical)
+		cv::transpose(image, image);
+
+	return true;
+}
+
+void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
+{
+	for(const Yolo::Detection& detection : detections)
+	{
+		cv::rectangle(image, detection.box, detection.color, 3);
+		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
+		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
+		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
+		cv::rectangle(image, textBox, detection.color, cv::FILLED);
+		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
+	}
+
+	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
+}
+
+static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
+{
+	int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
+	if(std::max(image.cols, image.rows) > longTargetSize)
+	{
+		if(image.cols > image.rows)
+		{
+			double ratio = static_cast<double>(longTargetSize)/image.cols;
+			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
+		}
+		else
+		{
+			double ratio = static_cast<double>(longTargetSize)/image.rows;
+			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
+		}
+	}
+}
+
+void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
+	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
+{
+	InteligentRoi intRoi(yolo);
+	cv::Mat image = cv::imread(path);
+	if(!image.data)
+	{
+		Log(Log::WARN)<<"could not load image "<<path<<" skipping";
+		return;
+	}
+
+	reduceSize(image, config.targetSize);
+
+	std::vector<Yolo::Detection> detections = yolo.runInference(image);
+
+	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
+	for(Yolo::Detection& detection : detections)
+	{
+		bool hasmatch = false;
+		if(recognizer && detection.className == "person")
+		{
+			cv::Mat person = image(detection.box);
+			reconizerMutex.lock();
+			FaceRecognizer::Detection match = recognizer->isMatch(person);
+			reconizerMutex.unlock();
+			if(match.person >= 0)
+			{
+				detection.priority += 10;
+				hasmatch = true;
+				detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
+			}
+		}
+		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
+	}
+
+	cv::Rect crop;
+	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
+
+	if(config.seamCarving && incompleate)
+	{
+		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
+		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
+		{
+			detections = yolo.runInference(image);
+		}
+	}
+
+	cv::Mat croppedImage;
+
+	if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
+	{
+		intRoi.getCropRectangle(crop, detections, image.size());
+
+		if(config.debug)
+		{
+			cv::Mat debugImage = image.clone();
+			drawDebugInfo(debugImage, crop, detections);
+			bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
+			if(!ret)
+				Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
+		}
+
+		croppedImage = image(crop);
+	}
+	else if(!incompleate)
+	{
+		croppedImage = image(crop);
+	}
+	else
+	{
+		croppedImage = image;
+	}
+
+	cv::Mat resizedImage;
+	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
+	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
+	if(!ret)
+		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
+}
+
+void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
+		std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
+{
+	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
+	for(std::filesystem::path path : images)
+		pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
+}
+
+template<typename T>
+std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
+{
+	std::vector<std::vector<T>> out;
+
+	size_t length = vec.size()/parts;
+	size_t remain = vec.size() % parts;
+
+	size_t begin = 0;
+	size_t end = 0;
+
+	for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
+	{
+		end += (remain > 0) ? (length + !!(remain--)) : length;
+		out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
+		begin = end;
+	}
+
+	return out;
+}
+
+int main(int argc, char* argv[])
+{
+	Log::level = Log::INFO;
+
+	Config config;
+	argp_parse(&argp, argc, argv, 0, 0, &config);
+
+	if(config.outputDir.empty())
+	{
+		Log(Log::ERROR)<<"a output path \"-o\" is required";
+		return 1;
+	}
+
+	if(config.imagePaths.empty())
+	{
+		Log(Log::ERROR)<<"at least one input image or directory is required";
+		return 1;
+	}
+
+	std::vector<std::filesystem::path> imagePaths;
+
+	for(const std::filesystem::path& path : config.imagePaths)
+		getImageFiles(path, imagePaths);
+
+	Log(Log::DEBUG)<<"Images:";
+	for(const::std::filesystem::path& path: imagePaths)
+		Log(Log::DEBUG)<<path;
+
+	if(imagePaths.empty())
+	{
+		Log(Log::ERROR)<<"no image was found\n";
+		return 1;
+	}
+
+	if(!std::filesystem::exists(config.outputDir))
+	{
+		if(!std::filesystem::create_directory(config.outputDir))
+		{
+			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
+			return 1;
+		}
+	}
+
+	std::filesystem::path debugOutputPath(config.outputDir/"debug");
+	if(config.debug)
+	{
+		if(!std::filesystem::exists(debugOutputPath))
+			std::filesystem::create_directory(debugOutputPath);
+	}
+
+	FaceRecognizer* recognizer = nullptr;
+	std::mutex recognizerMutex;
+	if(!config.focusPersonImage.empty())
+	{
+		cv::Mat personImage = cv::imread(config.focusPersonImage);
+		if(personImage.empty())
+		{
+			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
+			return 1;
+		}
+		recognizer = new FaceRecognizer();
+		recognizer->addReferances({personImage});
+		recognizer->setThreshold(config.threshold);
+	}
+
+	std::vector<std::thread> threads;
+	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
+
+	for(size_t i = 0; i < imagePathParts.size(); ++i)
+		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
+
+	for(std::thread& thread : threads)
+		thread.join();
+
+	return 0;
+}
--- a/SmartCrop/options.h
+++ b/SmartCrop/options.h
@ -0,0 +1,117 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <string>
+#include <vector>
+#include <argp.h>
+#include <iostream>
+#include <filesystem>
+#include <opencv2/core/types.hpp>
+#include "log.h"
+
+const char *argp_program_version = "AIImagePreprocesses";
+const char *argp_program_bug_address = "<carl@uvos.xyz>";
+static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
+static char args_doc[] = "FILE(S)";
+
+static struct argp_option options[] =
+{
+  {"verbose",		'v', 0,				0,	"Show debug messages" },
+  {"quiet", 		'q', 0,				0,	"only output data" },
+  {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" },
+  {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" },
+  {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" },
+  {"debug", 		'd', 0,				0,	"output debug images" },
+  {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"},
+  {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"},
+  {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"},
+  {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"},
+  {0}
+};
+
+struct Config
+{
+	std::vector<std::filesystem::path> imagePaths;
+	std::filesystem::path modelPath;
+	std::filesystem::path classesPath;
+	std::filesystem::path outputDir;
+	std::filesystem::path focusPersonImage;
+	bool seamCarving = false;
+	bool debug = false;
+	double threshold = 0.363;
+	cv::Size targetSize = cv::Size(512, 512);
+};
+
+static error_t parse_opt (int key, char *arg, struct argp_state *state)
+{
+	Config *config = reinterpret_cast<Config*>(state->input);
+	try
+	{
+		switch (key)
+		{
+		case 'q':
+			Log::level = Log::ERROR;
+			break;
+		case 'v':
+			Log::level = Log::DEBUG;
+			break;
+		case 'm':
+			config->modelPath = arg;
+			break;
+		case 'c':
+			config->classesPath = arg;
+			break;
+		case 'd':
+			config->debug = true;
+			break;
+		case 'o':
+			config->outputDir.assign(arg);
+			break;
+		case 's':
+			config->seamCarving = true;
+			break;
+		case 'f':
+			config->focusPersonImage = arg;
+			break;
+		case 't':
+			config->threshold = std::atof(arg);
+			break;
+		case 'z':
+		{
+			int x = std::stoi(arg);
+			config->targetSize = cv::Size(x, x);
+			break;
+		}
+		case ARGP_KEY_ARG:
+			config->imagePaths.push_back(arg);
+			break;
+		default:
+			return ARGP_ERR_UNKNOWN;
+		}
+	}
+	catch(const std::invalid_argument& ex)
+	{
+		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
+		return ARGP_KEY_ERROR;
+	}
+	return 0;
+}
+
+static struct argp argp = {options, parse_opt, args_doc, doc};
--- a/SmartCrop/readfile.h
+++ b/SmartCrop/readfile.h
@ -0,0 +1,35 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <string>
+#include <filesystem>
+#include <fstream>
+#include <stdexcept>
+#include <sstream>
+
+inline std::string readFile(const std::filesystem::path& path)
+{
+	std::ifstream file(path);
+	if(!file.is_open())
+		throw std::runtime_error(std::string("could not open file ") + path.string());
+	std::stringstream ss;
+	ss<<file.rdbuf();
+	return ss.str();
+}
--- a/SmartCrop/seamcarving.cpp
+++ b/SmartCrop/seamcarving.cpp
@ -0,0 +1,376 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "seamcarving.h"
+
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <iostream>
+#include <filesystem>
+#include <cfloat>
+#include <vector>
+#include "log.h"
+
+bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
+{
+	cv::Mat newFrame = image.clone();
+	assert(!newFrame.empty());
+	std::vector<std::vector<int>> vecSeams;
+
+	for(int i = 0; i < seams; i++)
+	{
+		//Gradient Magnitude for intensity of image.
+		cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
+		//Use DP to create the real energy map that is used for path calculation.
+		// Strictly using vertical paths for testing simplicity.
+		cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
+
+		if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
+			return false;
+		std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
+		vecSeams.push_back(seam);
+		if(seamsVect)
+			seamsVect->push_back(seam);
+
+		newFrame = removeLeastImportantPath(newFrame, seam);
+
+		if(newFrame.rows == 0 || newFrame.cols == 0)
+			return false;
+	}
+
+	if (grow)
+	{
+		cv::Mat growMat = image.clone();
+
+		for(size_t i = 0; i < vecSeams.size(); i++)
+		{
+			growMat = addLeastImportantPath(growMat,vecSeams[i]);
+		}
+		image = growMat;
+	}
+	else
+	{
+		image = newFrame;
+	}
+	return true;
+}
+
+bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
+{
+	cv::transpose(image, image);
+	bool ret = strechImage(image, seams, grow, seamsVect);
+	cv::transpose(image, image);
+	return ret;
+}
+
+bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
+{
+	std::vector<std::vector<int>> seamsVect;
+	seamsImage = image.clone();
+
+	bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
+	if(!ret)
+		return false;
+
+	for(size_t i = 0; i < seamsVect.size(); ++i)
+		seamsImage = drawSeam(seamsImage, seamsVect[i]);
+	return true;
+}
+
+cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
+{
+	// find partial derivative of x-axis and y-axis seperately
+	// sum up the partial derivates
+	float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1};
+	cv::Mat xFilter(3, 3, CV_32FC1, pd);
+	cv::Mat yFilter = xFilter.t();
+	cv::Mat grayImg;
+	cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY);
+	cv::Mat dxImg;
+	cv::Mat dyImg;
+
+	cv::filter2D(grayImg, dxImg, 0, xFilter);
+	cv::filter2D(grayImg, dyImg, 0, yFilter);
+	//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
+	//cv::Mat absDxImg;
+	//cv::Mat absDyImg;
+	//cv::absdiff(dxImg, zeroMat, absDxImg);
+	//cv::absdiff(dyImg, zeroMat, absDyImg);
+	cv::Mat absDxImg = cv::abs(dxImg);
+	cv::Mat absDyImg = cv::abs(dyImg);
+
+	cv::Mat energyImg;
+	cv::add(absDxImg, absDyImg, energyImg);
+	return energyImg;
+}
+
+cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame)
+{
+	cv::Mat grayScale;
+	cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY);
+	cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1);
+	cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1);
+	cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1);
+	Sobel(grayScale, drv, CV_16SC1, 1, 0);
+	drv.convertTo(drv32f, CV_32FC1);
+	cv::accumulateSquare(drv32f, mag);
+	Sobel(grayScale, drv, CV_16SC1, 0, 1);
+	drv.convertTo(drv32f, CV_32FC1);
+	cv::accumulateSquare(drv32f, mag);
+	cv::sqrt(mag, mag);
+	return mag;
+}
+
+float SeamCarving::intensity(float currIndex, int start, int end)
+{
+	if(start < 0 || start >= end)
+	{
+		return FLT_MAX;
+	}
+	else
+	{
+		return currIndex;
+	}
+}
+
+cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap)
+{
+	cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1);
+
+	if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0)
+	{
+		return cv::Mat();
+	}
+
+	//First row of intensity paths is the same as the energy map
+	rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0));
+	float max = 0;
+
+	//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
+	for(int row = 1; row < pathIntensityMap.rows; row++)
+	{
+		for(int col = 0; col < pathIntensityMap.cols; col++)
+		{
+			//The initial intensity of the pixel is its raw intensity
+			float pixelIntensity = rawEnergyMap.at<float>(row, col);
+			//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
+			float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols);
+			float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols);
+			float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols);
+
+			float minIntensity = std::min(p1, p2);
+			minIntensity = std::min(minIntensity, p3);
+
+			pixelIntensity += minIntensity;
+
+			max = std::max(max, pixelIntensity);
+			pathIntensityMap.at<float>(row, col) = pixelIntensity;
+		}
+	}
+	return pathIntensityMap;
+}
+
+std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap)
+{
+	if(importanceMap.total() == 0)
+	{
+		return std::vector<int>();
+	}
+
+	//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
+	float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0);
+	int minCol = 0;
+	for (int col = 1; col < importanceMap.cols; col++)
+	{
+		float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col);
+		if(currPixel < minImportance)
+		{
+			minCol = col;
+			minImportance = currPixel;
+		}
+	}
+
+	std::vector<int> leastEnergySeam(importanceMap.rows);
+	leastEnergySeam[importanceMap.rows-1] = minCol;
+	for(int row = importanceMap.rows - 2; row >= 0; row--)
+	{
+		float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols);
+		float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols);
+		float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols);
+		//Adjust the min column for path following
+		if(p1 < p2 && p1 < p3)
+		{
+			minCol -= 1;
+		}
+		else if(p3 < p1 && p3 < p2)
+		{
+			minCol += 1;
+		}
+		leastEnergySeam[row] = minCol;
+	}
+
+	return leastEnergySeam;
+}
+
+cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
+{
+	cv::Size orgSize = original.size();
+	// new mat needs to shrink by one collumn
+	cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
+	cv::Mat newMat = cv::Mat(size, original.type());
+
+	for(size_t row = 0; row < seam.size(); row++)
+	{
+		removePixel(original, newMat, row, seam[row]);
+	}
+	return newMat;
+}
+
+void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
+{
+	int width = original.cols;
+	int channels = original.channels();
+	int originRowStart = row * channels * width;
+	int newRowStart = row * channels * (width - 1);
+	int firstNum = minCol * channels;
+	unsigned char *rawOrig = original.data;
+	unsigned char *rawOutput = outputMat.data;
+
+	//std::cout << "originRowStart: " << originRowStart << std::endl;
+	//std::cout << "newRowStart: " << newRowStart << std::endl;
+	//std::cout << "firstNum: " << firstNum << std::endl;
+	memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
+
+	int originRowMid = originRowStart + (minCol + 1) * channels;
+	int newRowMid = newRowStart + minCol * channels;
+	int secondNum = (width - 1) * channels - firstNum;
+
+	//std::cout << "originRowMid: " << originRowMid << std::endl;
+	//std::cout << "newRowMid: " << newRowMid << std::endl;
+	//std::cout << "secondNum: " << secondNum << std::endl;
+	memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
+
+	int leftPixel = minCol - 1;
+	int rightPixel = minCol + 1;
+
+	int byte1 = rawOrig[originRowStart + minCol * channels];
+	int byte2 = rawOrig[originRowStart + minCol * channels + 1];
+	int byte3 = rawOrig[originRowStart + minCol * channels + 2];
+
+	if (rightPixel < width)
+	{
+		int byte1R = rawOrig[originRowStart + rightPixel * channels];
+		int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
+		int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
+		rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
+		rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
+		rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
+	}
+
+	if(leftPixel >= 0)
+	{
+		int byte1L = rawOrig[originRowStart + leftPixel*channels];
+		int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
+		int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
+		rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
+		rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
+		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
+	}
+}
+
+cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
+{
+	cv::Size orgSize = original.size();
+	// new mat needs to grow by one column
+	cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
+	cv::Mat newMat = cv::Mat(size, original.type());
+
+	for(size_t row = 0; row < seam.size(); row++)
+	{
+		//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
+		addPixel(original, newMat, row, seam[row]);
+	}
+	return newMat;
+}
+
+void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
+{
+	int width = original.cols;
+	int channels = original.channels();
+	int originRowStart = row * channels * width;
+	int newRowStart = row * channels * (width + 1);
+	int firstNum = (minCol + 1) * channels;
+
+	unsigned char *rawOrig = original.data;
+	unsigned char *rawOutput = outputMat.data;
+
+	memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
+
+	memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels);
+
+	int originRowMid = originRowStart + ((minCol + 1) * channels);
+	int newRowMid = newRowStart + ((minCol + 2) * channels);
+	int secondNum = (width * channels) - firstNum;
+
+	memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
+
+	int leftPixel = minCol - 1;
+	int rightPixel = minCol + 1;
+
+	int byte1 = rawOrig[originRowStart + minCol * channels];
+	int byte2 = rawOrig[originRowStart + minCol * channels + 1];
+	int byte3 = rawOrig[originRowStart + minCol * channels + 2];
+
+	if (rightPixel < width)
+	{
+		int byte1R = rawOrig[originRowStart + rightPixel * channels];
+		int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
+		int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
+		rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
+		rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
+		rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
+	}
+
+	if(leftPixel >= 0)
+	{
+		int byte1L = rawOrig[originRowStart + leftPixel*channels];
+		int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
+		int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
+		rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
+		rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
+		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
+	}
+}
+
+cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
+{
+	cv::Mat retMat = frame.clone();
+	for(int row = 0; row < frame.rows; row++)
+	{
+		for(int col = 0; col < frame.cols; col++)
+		{
+			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
+			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
+			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
+		}
+	}
+	return retMat;
+}
--- a/SmartCrop/seamcarving.h
+++ b/SmartCrop/seamcarving.h
@ -0,0 +1,43 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <opencv2/core/core.hpp>
+#include <vector>
+
+class SeamCarving
+{
+private:
+	static cv::Mat GetEnergyImg(const cv::Mat &img);
+	static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
+	static float intensity(float currIndex, int start, int end);
+	static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
+	static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
+	static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
+	static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
+	static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
+	static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
+	static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
+
+public:
+	static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
+	static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
+	static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
+};
--- a/SmartCrop/tokenize.cpp
+++ b/SmartCrop/tokenize.cpp
@ -0,0 +1,46 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "tokenize.h"
+
+
+std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
+{
+	std::vector<std::string> tokens;
+	std::string token;
+	bool inBaracket = false;
+	for(size_t i = 0; i < str.size(); ++i)
+	{
+		if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
+		{
+			tokens.push_back(token);
+			token.clear();
+		}
+		else
+		{
+			token.push_back(str[i]);
+		}
+		if(ignoreBraket == str[i])
+			inBaracket = !inBaracket;
+	}
+	if(!inBaracket)
+		tokens.push_back(token);
+	return tokens;
+}
--- a/SmartCrop/tokenize.h
+++ b/SmartCrop/tokenize.h
@ -0,0 +1,26 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
+											  const char escapeChar = '\0');
--- a/SmartCrop/utils.cpp
+++ b/SmartCrop/utils.cpp
@ -0,0 +1,80 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "utils.h"
+
+#include <filesystem>
+#include <vector>
+#include <opencv2/imgproc.hpp>
+
+bool isImagePath(const std::filesystem::path& path)
+{
+	return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg");
+}
+
+void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths)
+{
+	if(isImagePath(path))
+	{
+		paths.push_back(path);
+	}
+	else if(std::filesystem::is_directory(path))
+	{
+		for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path))
+		{
+			if(std::filesystem::is_directory(dirent.path()))
+				getImageFiles(dirent.path(), paths);
+			else if(isImagePath(dirent.path()))
+				paths.push_back(dirent.path());
+		}
+	}
+}
+
+cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points)
+{
+	int left = std::numeric_limits<int>::max();
+	int right = std::numeric_limits<int>::min();
+	int top = std::numeric_limits<int>::max();
+	int bottom = std::numeric_limits<int>::min();
+
+	for(const std::pair<cv::Point, int>& point : points)
+	{
+		left = point.first.x < left ? point.first.x : left;
+		right = point.first.x > right ? point.first.x : right;
+
+		top = point.first.y < top ? point.first.y : top;
+		bottom = point.first.y > bottom ? point.first.y : bottom;
+	}
+
+	return cv::Rect(left, top, right-left, bottom-top);
+}
+
+double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB)
+{
+	cv::Vec2i a(pointA.x, pointA.y);
+	cv::Vec2i b(pointB.x, pointB.y);
+	return cv::norm(a-b);
+}
+
+bool pointInRect(const cv::Point2i& point, const cv::Rect& rect)
+{
+	return point.x >= rect.x && point.x <= rect.x+rect.width &&
+		   point.y >= rect.y && point.y <= rect.y+rect.height;
+}
--- a/SmartCrop/utils.h
+++ b/SmartCrop/utils.h
@ -0,0 +1,34 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <filesystem>
+#include <vector>
+#include <opencv2/imgproc.hpp>
+
+bool isImagePath(const std::filesystem::path& path);
+
+void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
+
+cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
+
+double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
+
+bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
--- a/SmartCrop/yolo.cpp
+++ b/SmartCrop/yolo.cpp
@ -0,0 +1,278 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <opencv2/dnn/dnn.hpp>
+#include <algorithm>
+#include <string>
+#include <stdexcept>
+
+#include "yolo.h"
+#include "readfile.h"
+#include "tokenize.h"
+#include "log.h"
+
+#define INCBIN_PREFIX r
+#include "incbin.h"
+
+INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
+INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
+
+Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
+		const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
+{
+	modelPath = onnxModelPath;
+	modelShape = modelInputShape;
+
+	if(classesTxtFilePath.empty())
+	{
+		Log(Log::INFO)<<"Using builtin classes";
+		loadClasses(rdefaultClassesData);
+	}
+	else
+	{
+		std::string classesStr = readFile(classesTxtFilePath);
+		loadClasses(classesStr);
+	}
+
+	if(!modelPath.empty())
+	{
+		net = cv::dnn::readNetFromONNX(modelPath);
+	}
+	else
+	{
+		Log(Log::INFO)<<"Using builtin yolo model";
+		net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
+	}
+	if(runWithOCl)
+	{
+		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
+		net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
+	}
+	else
+	{
+		net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
+		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
+	}
+}
+
+std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
+{
+	cv::Mat modelInput = input;
+	if (letterBoxForSquare && modelShape.width == modelShape.height)
+		modelInput = formatToSquare(modelInput);
+
+	cv::Mat blob;
+	cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
+	net.setInput(blob);
+
+	std::vector<cv::Mat> outputs;
+	net.forward(outputs, net.getUnconnectedOutLayersNames());
+
+	int rows = outputs[0].size[1];
+	int dimensions = outputs[0].size[2];
+
+	bool yolov8 = false;
+	// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+	// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+	if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
+	{
+		yolov8 = true;
+		rows = outputs[0].size[2];
+		dimensions = outputs[0].size[1];
+
+		outputs[0] = outputs[0].reshape(1, dimensions);
+		cv::transpose(outputs[0], outputs[0]);
+	}
+	float *data = (float *)outputs[0].data;
+
+	float x_factor = modelInput.cols / modelShape.width;
+	float y_factor = modelInput.rows / modelShape.height;
+
+	std::vector<int> class_ids;
+	std::vector<float> confidences;
+	std::vector<cv::Rect> boxes;
+
+	for (int i = 0; i < rows; ++i)
+	{
+		if (yolov8)
+		{
+			float *classes_scores = data+4;
+
+			cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+			cv::Point class_id;
+			double maxClassScore;
+
+			minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+
+			if (maxClassScore > modelScoreThreshold)
+			{
+				confidences.push_back(maxClassScore);
+				class_ids.push_back(class_id.x);
+
+				float x = data[0];
+				float y = data[1];
+				float w = data[2];
+				float h = data[3];
+
+				int left = int((x - 0.5 * w) * x_factor);
+				int top = int((y - 0.5 * h) * y_factor);
+
+				int width = int(w * x_factor);
+				int height = int(h * y_factor);
+
+				boxes.push_back(cv::Rect(left, top, width, height));
+			}
+		}
+		else // yolov5
+		{
+			float confidence = data[4];
+
+			if (confidence >= modelConfidenceThreshold)
+			{
+				float *classes_scores = data+5;
+
+				cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+				cv::Point class_id;
+				double max_class_score;
+
+				minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+
+				if (max_class_score > modelScoreThreshold)
+				{
+					confidences.push_back(confidence);
+					class_ids.push_back(class_id.x);
+
+					float x = data[0];
+					float y = data[1];
+					float w = data[2];
+					float h = data[3];
+
+					int left = int((x - 0.5 * w) * x_factor);
+					int top = int((y - 0.5 * h) * y_factor);
+
+					int width = int(w * x_factor);
+					int height = int(h * y_factor);
+
+					boxes.push_back(cv::Rect(left, top, width, height));
+				}
+			}
+		}
+
+		data += dimensions;
+	}
+
+	std::vector<int> nms_result;
+	cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
+
+	std::vector<Yolo::Detection> detections{};
+	for(unsigned long i = 0; i < nms_result.size(); ++i)
+	{
+		int idx = nms_result[i];
+
+		Yolo::Detection result;
+		result.class_id = class_ids[idx];
+		result.confidence = confidences[idx];
+
+		std::random_device rd;
+		std::mt19937 gen(rd());
+		std::uniform_int_distribution<int> dis(100, 255);
+		result.color = cv::Scalar(dis(gen),
+		                          dis(gen),
+		                          dis(gen));
+
+		result.className = classes[result.class_id].first;
+		result.priority = classes[result.class_id].second;
+		clampBox(boxes[idx], input.size());
+		result.box = boxes[idx];
+		detections.push_back(result);
+	}
+
+	return detections;
+}
+
+
+void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
+{
+	if(box.x < 0)
+	{
+		box.width += box.x;
+		box.x = 0;
+	}
+	if(box.y < 0)
+	{
+		box.height += box.y;
+		box.y = 0;
+	}
+	if(box.x+box.width > size.width)
+		box.width = size.width - box.x;
+	if(box.y+box.height > size.height)
+		box.height = size.height - box.y;
+}
+
+void Yolo::loadClasses(const std::string& classesStr)
+{
+	std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
+	classes.clear();
+	for(std::string& instance : candidateClasses)
+	{
+		if(instance.size() < 2)
+			continue;
+
+		std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\');
+
+		if(*tokens[0].begin() == '"')
+			instance.erase(tokens[0].begin());
+		if(tokens[0].back() == '"')
+			tokens[0].pop_back();
+		int priority = -1;
+		if(tokens.size() > 1)
+		{
+			try
+			{
+				priority = std::stoi(tokens[1]);
+			}
+			catch(const std::invalid_argument& err)
+			{
+				Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what();
+			}
+		}
+		classes.push_back({tokens[0], priority});
+	}
+}
+
+cv::Mat Yolo::formatToSquare(const cv::Mat &source)
+{
+	int col = source.cols;
+	int row = source.rows;
+	int _max = MAX(col, row);
+	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+	source.copyTo(result(cv::Rect(0, 0, col, row)));
+	return result;
+}
+
+int Yolo::getClassForStr(const std::string& str) const
+{
+	for(size_t i = 0; i < classes.size(); ++i)
+	{
+		if(classes[i].first == str)
+			return i;
+	}
+	return -1;
+}
--- a/SmartCrop/yolo.h
+++ b/SmartCrop/yolo.h
@ -0,0 +1,65 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <fstream>
+#include <vector>
+#include <string>
+#include <random>
+#include <filesystem>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+class Yolo
+{
+public:
+	struct Detection
+	{
+		int class_id = 0;
+		std::string className;
+		float confidence = 0.0;
+		int priority = -1;
+		cv::Scalar color;
+		cv::Rect box;
+	};
+
+private:
+	static constexpr float modelConfidenceThreshold = 0.25;
+	static constexpr float modelScoreThreshold = 0.45;
+	static constexpr float modelNMSThreshold = 0.50;
+
+	std::string modelPath;
+	std::vector<std::pair<std::string, int>> classes;
+	cv::Size2f modelShape;
+	bool letterBoxForSquare = true;
+	cv::dnn::Net net;
+
+	void loadClasses(const std::string& classes);
+	void loadOnnxNetwork(const std::filesystem::path& path);
+	cv::Mat formatToSquare(const cv::Mat &source);
+	static void clampBox(cv::Rect& box, const cv::Size& size);
+
+public:
+	Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
+		const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true);
+	std::vector<Detection> runInference(const cv::Mat &input);
+	int getClassForStr(const std::string& str) const;
+};
--- a/Weights/classes.txt
+++ b/Weights/classes.txt
@ -0,0 +1,80 @@
+person, 10
+bicycle, 4
+car, 3
+motorcycle, 4
+airplane, 4
+bus, 4
+train, 4
+truck, 3
+boat, 4
+traffic light, 1
+fire hydrant, 1
+stop sign, 1
+parking meter, 1
+bench, 2
+bird, 5
+cat, 6
+dog, 5
+horse, 4
+sheep, 5
+cow, 4
+elephant, 5
+bear, 5
+zebra, 5
+giraffe, 5
+backpack, 3
+umbrella, 3
+handbag, 3
+tie, 3
+suitcase, 2
+frisbee, 3
+skis, 3
+snowboard, 3
+sports ball, 3
+kite, 4
+baseball bat, 3
+baseball glove, 3
+skateboard, 3
+surfboard, 3
+tennis racket, 3
+bottle, 2
+wine glass, 2
+cup, 2
+fork, 1
+knife, 1
+spoon, 1
+bowl, 1
+banana, 1
+apple, 1
+sandwich,1
+orange, 1
+broccoli, 1
+carrot, 1
+hot dog, 1
+pizza, 1
+donut, 2
+cake, 2
+chair, 1
+couch, 1
+potted plant, 1
+bed, 1
+dining table, 1
+toilet, 1
+tv, 1
+laptop, 1
+mouse, 1
+remote, 1
+keyboard, 1
+cell phone, 1
+microwave, 1
+oven, 1
+toaster, 1
+sink, 1
+refrigerator, 1
+book, 1
+clock, 1
+vase, 1
+scissors, 1
+teddy bear, 1
+hair drier, 1
+toothbrush, 1
--- a/Weights/face_detection_yunet_2023mar.onnx
+++ b/Weights/face_detection_yunet_2023mar.onnx
--- a/Weights/face_recognition_sface_2021dec.onnx
+++ b/Weights/face_recognition_sface_2021dec.onnx