add readmes

PersonDatasetAssembler: add requirements.txt
LLavaTagger: correct requirements
2024-06-14 00:09:14 +02:00 · 2024-06-14 00:08:34 +02:00 · 2024-06-13 23:52:52 +02:00 · 2024-06-13 23:47:10 +02:00 · 2024-06-13 23:44:31 +02:00 · 2024-06-13 23:39:49 +02:00
39 changed files with 1111 additions and 110 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,15 +1,7 @@
 cmake_minimum_required(VERSION 3.6)
-project(AIImagePrepross)
-
-find_package(OpenCV REQUIRED)
+project(ImageAiUtils)

 set(CMAKE_CXX_STANDARD 17)
+set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)

-set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
-
-add_executable(${PROJECT_NAME} ${SRC_FILES})
-target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
-target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
-target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
-
-install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
+add_subdirectory(SmartCrop)
--- a/DanbooruTagger/DanbooruTagger.py
+++ b/DanbooruTagger/DanbooruTagger.py
@ -0,0 +1,105 @@
+import warnings
+from deepdanbooru_onnx import DeepDanbooru
+from PIL import Image
+import argparse
+import cv2
+import os
+from multiprocessing import Process, Queue
+import json
+from tqdm import tqdm
+
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]):
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		imagebgr = cv2.imread(path)
+		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
+		if image is None:
+			print(f"Warning: could not load {path}")
+		else:
+			image_pil = Image.fromarray(image)
+			yield image_pil, path
+
+
+def pipeline(queue: Queue, image_paths: list[str], device: int):
+	danbooru = DeepDanbooru()
+
+	for path in image_paths:
+		imageprompt = ""
+		tags = danbooru(path)
+		for tag in tags:
+			imageprompt = imageprompt + ", " + tag
+
+		queue.put({"file_name": path, "text": imageprompt})
+
+
+def split_list(input_list, count):
+	target_length = int(len(input_list) / count)
+	for i in range(0, count - 1):
+		yield input_list[i * target_length: (i + 1) * target_length]
+	yield input_list[(count - 1) * target_length: len(input_list)]
+
+
+def save_meta(meta_file, meta, reldir, common_description):
+	meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
+	if common_description is not None:
+		meta["text"] = common_description + meta["text"]
+	meta_file.write(json.dumps(meta) + '\n')
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru")
+	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
+	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
+	parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
+	args = parser.parse_args()
+
+	nparalell = 2
+
+	image_paths = find_image_files(args.image_dir)
+	image_path_chunks = list(split_list(image_paths, nparalell))
+
+	print(f"Will use {nparalell} processies to create tags")
+
+	queue = Queue()
+	processies = list()
+	for i in range(0, nparalell):
+		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i)))
+		processies[-1].start()
+
+	progress = tqdm(desc="Generateing tags", total=len(image_paths))
+	exit = False
+	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
+		while not exit:
+			if not queue.empty():
+				meta = queue.get()
+				save_meta(output_file, meta, args.image_dir, args.common_description)
+				progress.update()
+			exit = True
+			for process in processies:
+				if process.is_alive():
+					exit = False
+					break
+
+		while not queue.empty():
+			meta = queue.get()
+			save_meta(output_file, meta, args.image_dir, args.common_description)
+			progress.update()
+
+	for process in processies:
+		process.join()
+
--- a/DanbooruTagger/deepdanbooru_onnx/init.py
+++ b/DanbooruTagger/deepdanbooru_onnx/init.py
@ -0,0 +1,3 @@
+from .deepdanbooru_onnx import DeepDanbooru
+from .deepdanbooru_onnx import process_image
+__version__ = '0.0.8'
--- a/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
+++ b/DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
@ -0,0 +1,244 @@
+import onnxruntime as ort
+from PIL import Image
+import numpy as np
+import os
+from tqdm import tqdm
+import requests
+import hashlib
+from typing import List, Union
+import shutil
+from pathlib import Path
+
+
+def process_image(image: Image.Image) -> np.ndarray:
+    """
+    Convert an image to a numpy array.
+    :param image: the image to convert
+    :return: the numpy array
+    """
+
+    image = image.convert("RGB").resize((512, 512))
+    image = np.array(image).astype(np.float32) / 255
+    image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
+    return image
+
+
+def download(url: str, save_path: str, md5: str, length: str) -> bool:
+    """
+    Download a file from url to save_path.
+    If the file already exists, check its md5.
+    If the md5 matches, return True,if the md5 doesn't match, return False.
+    :param url: the url of the file to download
+    :param save_path: the path to save the file
+    :param md5: the md5 of the file
+    :param length: the length of the file
+    :return: True if the file is downloaded successfully, False otherwise
+    """
+
+    try:
+        response = requests.get(url=url, stream=True)
+        with open(save_path, "wb") as f:
+            with tqdm.wrapattr(
+                response.raw, "read", total=length, desc="Downloading"
+            ) as r_raw:
+                shutil.copyfileobj(r_raw, f)
+        return (
+            True
+            if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
+            else False
+        )
+    except Exception as e:
+        print(e)
+        return False
+
+
+def download_model():
+    """
+    Download the model and tags file from the server.
+    :return: the path to the model and tags file
+    """
+
+    model_url = (
+        "https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
+    )
+    tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
+    model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
+    tags_md5 = "a3f764de985cdeba89f1d232a4204402"
+    model_length = 643993025
+    tags_length = 133810
+
+    home = str(Path.home()) + "/.deepdanbooru_onnx/"
+    if not os.path.exists(home):
+        os.mkdir(home)
+
+    model_name = "deepdanbooru.onnx"
+    tags_name = "tags.txt"
+
+    model_path = home + model_name
+    tags_path = home + tags_name
+    if os.path.exists(model_path):
+        if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
+            os.remove(model_path)
+            if not download(model_url, model_path, model_md5, model_length):
+                raise ValueError("Model download failed")
+
+    else:
+        if not download(model_url, model_path, model_md5, model_length):
+            raise ValueError("Model download failed")
+
+    if os.path.exists(tags_path):
+        if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
+            os.remove(tags_path)
+            if not download(tags_url, tags_path, tags_md5, tags_length):
+                raise ValueError("Tags download failed")
+    else:
+        if not download(tags_url, tags_path, tags_md5, tags_length):
+            raise ValueError("Tags download failed")
+    return model_path, tags_path
+
+
+class DeepDanbooru:
+    def __init__(
+        self,
+        mode: str = "auto",
+        model_path: Union[str, None] = None,
+        tags_path: Union[str, None] = None,
+        threshold: Union[float, int] = 0.6,
+        pin_memory: bool = False,
+        batch_size: int = 1,
+    ):
+        """
+        Initialize the DeepDanbooru class.
+        :param mode: the mode of the model, "cpu" or "gpu" or "auto"
+        :param model_path: the path to the model file
+        :param tags_path: the path to the tags file
+        :param threshold: the threshold of the model
+        :param pin_memory: whether to use pin memory
+        :param batch_size: the batch size of the model
+        """
+
+        providers = {
+            "cpu": "CPUExecutionProvider",
+            "gpu": "CUDAExecutionProvider",
+            "tensorrt": "TensorrtExecutionProvider",
+            "auto": (
+                "CUDAExecutionProvider"
+                if "CUDAExecutionProvider" in ort.get_available_providers()
+                else "CPUExecutionProvider"
+            ),
+        }
+
+        if not (isinstance(threshold, float) or isinstance(threshold, int)):
+            raise TypeError("threshold must be float or int")
+        if threshold < 0 or threshold > 1:
+            raise ValueError("threshold must be between 0 and 1")
+        if mode not in providers:
+            raise ValueError(
+                "Mode not supported. Please choose from: cpu, gpu, tensorrt"
+            )
+        if providers[mode] not in ort.get_available_providers():
+            raise ValueError(
+                f"Your device is not supported {mode}. Please choose from: cpu"
+            )
+        if model_path is not None and not os.path.exists(model_path):
+            raise FileNotFoundError("Model file not found")
+        if tags_path is not None and not os.path.exists(tags_path):
+            raise FileNotFoundError("Tags file not found")
+
+        if model_path is None or tags_path is None:
+            model_path, tags_path = download_model()
+
+        self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
+        self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
+
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = [output.name for output in self.session.get_outputs()]
+        self.threshold = threshold
+        self.pin_memory = pin_memory
+        self.batch_size = batch_size
+        self.mode = mode
+        self.cache = {}
+
+    def __str__(self) -> str:
+        return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+    def from_image_inference(self, image: Image.Image) -> dict:
+        image = process_image(image)
+        return self.predict(image)
+
+    def from_ndarray_inferece(self, image: np.ndarray) -> dict:
+        if image.shape != (1, 512, 512, 3):
+            raise ValueError(f"Image must be {(1, 512, 512, 3)}")
+        return self.predict(image)
+
+    def from_file_inference(self, image: str) -> dict:
+        return self.from_image_inference(Image.open(image))
+
+    def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
+        if self.pin_memory:
+            image = [process_image(Image.open(i)) for i in image]
+        for i in [
+            image[i : i + self.batch_size]
+            for i in range(0, len(image), self.batch_size)
+        ]:
+            imagelist = i
+            bs = len(i)
+            _imagelist, idx, hashlist = [], [], []
+            for j in range(len(i)):
+                img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
+                image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
+                hashlist.append(image_hash)
+                if image_hash in self.cache:
+                    continue
+                if not self.pin_memory:
+                    _imagelist.append(process_image(img))
+                else:
+                    _imagelist.append(imagelist[j])
+                idx.append(j)
+
+            imagelist = _imagelist
+            if len(imagelist) != 0:
+                _image = np.vstack(imagelist)
+                results = self.inference(_image)
+                results_idx = 0
+            else:
+                results = []
+
+            for i in range(bs):
+                image_tag = {}
+                if i in idx:
+                    hash = hashlist[i]
+                    for tag, score in zip(self.tags, results[results_idx]):
+                        if score >= self.threshold:
+                            image_tag[tag] = score
+                    results_idx += 1
+                    self.cache[hash] = image_tag
+                    yield image_tag
+                else:
+                    yield self.cache[hashlist[i]]
+
+    def inference(self, image):
+        return self.session.run(self.output_name, {self.input_name: image})[0]
+
+    def predict(self, image):
+        result = self.inference(image)
+        image_tag = {}
+        for tag, score in zip(self.tags, result[0]):
+            if score >= self.threshold:
+                image_tag[tag] = score
+        return image_tag
+
+    def __call__(self, image) -> Union[dict, List[dict]]:
+        if isinstance(image, str):
+            return self.from_file_inference(image)
+        elif isinstance(image, np.ndarray):
+            return self.from_ndarray_inferece(image)
+        elif isinstance(image, list) or isinstance(image, tuple):
+            return self.from_list_inference(image)
+        elif isinstance(image, Image.Image):
+            return self.from_image_inference(image)
+        else:
+            raise ValueError("Image must be a file path or a numpy array or list/tuple")
--- a/LLavaTagger/README.md
+++ b/LLavaTagger/README.md
@ -0,0 +1,21 @@
+# LLavaTagger
+
+LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
+
+## How to use
+
+first create a python venv and install the required packages into it:
+
+	$ python -m venv venv
+	$ source venv/bin/activate
+	$ pip install -r requirements.txt
+
+Then run LLavaTagger for instance like so:
+
+	$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images
+
+By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus
+
+LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss)
+
+If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose
--- a/LLavaTagger/requirements.txt
+++ b/LLavaTagger/requirements.txt
@ -0,0 +1,11 @@
+accelerate==0.29.0
+bitsandbytes
+huggingface-hub==0.22.2
+ninja==1.11.1.1
+safetensors==0.4.2
+tokenizers==0.15.2
+transformers
+torch
+opencv-python
+numpy
+tqdm
--- a/PersonDatasetAssembler/PersonDatasetAssembler.py
+++ b/PersonDatasetAssembler/PersonDatasetAssembler.py
@ -0,0 +1,174 @@
+#!/bin/python3
+
+# PersonDatasetAssembler - A tool to assmble images of a specific person from a
+# directory of images or from a video file
+# Copyright (C) 2024 Carl Philipp Klemm
+#
+# This file is part of PersonDatasetAssembler.
+#
+# PersonDatasetAssembler is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# PersonDatasetAssembler is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with PersonDatasetAssembler.  If not, see <http://www.gnu.org/licenses/>.
+
+import argparse
+import os
+from typing import Iterator
+import cv2
+import numpy
+from tqdm import tqdm
+from wand.exceptions import BlobError
+from wand.image import Image
+
+image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
+image_ext_wand = [".dng", ".arw"]
+
+
+class LoadException(Exception):
+	pass
+
+
+def find_image_files(path: str) -> list[str]:
+	paths = list()
+	for root, dirs, files in os.walk(path):
+		for filename in files:
+			name, extension = os.path.splitext(filename)
+			if extension.lower() in image_ext_ocv or extension in image_ext_wand:
+				paths.append(os.path.join(root, filename))
+	return paths
+
+
+def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
+	for path in paths:
+		name, extension = os.path.splitext(path)
+		extension = extension.lower()
+		if extension in image_ext_ocv:
+			image = cv2.imread(path)
+			if image is None:
+				print(f"Warning: could not load {path}")
+			else:
+				yield image
+		elif extension in image_ext_wand:
+			try:
+				image = Image(filename=path)
+			except BlobError as e:
+				print(f"Warning: could not load {path}, {e}")
+				continue
+
+
+def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
+	ret = True
+	frame_counter = 0
+	while ret:
+		video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
+		ret, frame = video.read()
+		if ret:
+			yield frame
+		frame_counter += interval
+
+
+def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
+	detector.setInputSize([image.shape[1], image.shape[0]])
+	faces = detector.detect(image)[1]
+	if faces is None:
+		return 0, False
+	for face in faces:
+		cropped_image = recognizer.alignCrop(image, face)
+		features = recognizer.feature(cropped_image)
+		score_accum = 0.0
+		for referance in referance_features:
+			score_accum += recognizer.match(referance, features, 0)
+		score = score_accum / len(referance_features)
+		if score > thresh:
+			return score, True
+	return 0, False
+
+
+def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
+	images = list()
+	out = list()
+
+	if os.path.isfile(referance_path):
+		image = cv2.imread(referance_path)
+		if image is None:
+			print(f"Could not load image from {referance_path}")
+		else:
+			images.append(image)
+	elif os.path.isdir(referance_path):
+		filenames = find_image_files(referance_path)
+		images = list(image_loader(filenames))
+
+	for image in images:
+		detector.setInputSize([image.shape[1], image.shape[0]])
+		faces = detector.detect(image)[1]
+		if faces is None:
+			print("unable to find face in referance image")
+			exit(1)
+		image = recognizer.alignCrop(image, faces[0])
+		features = recognizer.feature(image)
+		out.append(features)
+
+	return out
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
+	parser.add_argument('--out', '-o', default="out", help="place to put dataset")
+	parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
+	parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
+	parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
+	parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
+	parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
+	parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
+	parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
+	args = parser.parse_args()
+
+	recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
+	detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
+		score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
+
+	referance_features = process_referance(detector, recognizer, args.referance)
+	if len(referance_features) < 1:
+		print(f"Could not load any referance image(s) from {args.referance}")
+		exit(1)
+
+	if os.path.isfile(args.input):
+		video = cv2.VideoCapture(args.input)
+		if not video.isOpened():
+			print(f"Unable to open {args.input} as a video file")
+			exit(1)
+		image_generator = extract_video_images(video, args.skip + 1)
+		total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
+	elif os.path.isdir(args.input):
+		image_filenams = find_image_files(args.input)
+		image_generator = image_loader(image_filenams)
+		total_images = len(image_filenams)
+	else:
+		print(f"{args.input} is not a video file nor is it a directory")
+		exit(1)
+
+	os.makedirs(args.out, exist_ok=True)
+
+	progress = tqdm(total=int(total_images), desc="0.00")
+	counter = 0
+	for image in image_generator:
+		if image.shape[0] > 512:
+			aspect = image.shape[0] / image.shape[1]
+			resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
+		else:
+			resized = image
+		score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
+		if match and not args.invert or not match and args.invert:
+			filename = f"{counter:04}.png"
+			cv2.imwrite(os.path.join(args.out, filename), image)
+			counter += 1
+		progress.set_description(f"{score:1.2f}")
+		progress.update()
+
--- a/PersonDatasetAssembler/README.md
+++ b/PersonDatasetAssembler/README.md
@ -0,0 +1,20 @@
+### PersonDatasetAssembler
+
+PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
+
+## How to use
+
+first create a python venv and install the required packages into it:
+
+	$ python -m venv venv
+	$ source venv/bin/activate
+	$ pip install -r requirements.txt
+
+Then run PersonDatasetAssembler for instance like so:
+
+	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson
+
+Or to extract images from a video:
+
+	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson
+
--- a/PersonDatasetAssembler/requirements.txt
+++ b/PersonDatasetAssembler/requirements.txt
@ -0,0 +1,4 @@
+numpy==1.26.4
+opencv-python==4.10.0.82
+tqdm==4.66.4
+Wand==0.6.13
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
+# SDImagePreprocess
+
+This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion.
+
+## Included tools
+
+This repo contains the following tools:
+
+### SmartCrop
+
+SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
+
+#### Content detected in image:
+
+![Content found in image](SmartCrop/images/IMGP3692.jpg)
+
+#### Cropped image based on content:
+![Cropped image](SmartCrop/images/IMGP3692C.jpg)
+
+### PersonDatasetAssembler
+
+PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
+
+### LLavaTagger
+
+LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
+
+### DanbooruTagger
+
+DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network.
+
+
+## License
+
+All files in this repo are litcenced GPL V3, see LICENSE
--- a/SmartCrop/CMakeLists.txt
+++ b/SmartCrop/CMakeLists.txt
@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.6)
+
+find_package(OpenCV REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+
+set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
+
+add_executable(smartcrop ${SRC_FILES})
+target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
+target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
+target_compile_options(smartcrop PRIVATE -s -g -Wall)
+message(WARNING ${WEIGHT_DIR})
+target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
+
+install(TARGETS smartcrop RUNTIME DESTINATION bin)
--- a/SmartCrop/README.md
+++ b/SmartCrop/README.md
@ -0,0 +1,50 @@
+# SmartCrop
+
+SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
+
+## Requirements
+
+* [cmake](https://cmake.org/) 3.6 or later
+* [opencv](https://opencv.org/) 4.8 or later
+* A c++17 capable compiler and standard lib like gcc or llvm/clang
+* git is required to get the source
+
+## Building
+
+The steps to build this application are:
+
+	$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
+	$ cd SDImagePreprocess
+	$ mkdir build
+	$ cmake ..
+	$ make
+
+The binary can then be found in build/SmartCrop and can optionaly be installed with:
+
+	$ sudo make install
+
+## Basic usage
+
+To process all images in the directory ~/images and output the images into ~/proceesedImages:
+
+	$ smartcrop --out processedImages ~/images/*
+
+To also focus on the person in the image ~/person.jpg
+
+	$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
+
+To also enable seam carving
+
+	$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
+
+see smartcrop --help for more
+
+## Example
+
+#### Content detected in image:
+![Content found in image](images/IMGP3692.jpg)
+
+#### Cropped image based on content:
+![Cropped image](images/IMGP3692C.jpg)
+
+
--- a/SmartCrop/facerecognizer.cpp
+++ b/SmartCrop/facerecognizer.cpp
@ -1,11 +1,31 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include "facerecognizer.h"
 #include <filesystem>

 #define INCBIN_PREFIX r
 #include "incbin.h"

-INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
-INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
+INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
+INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");

 #include <opencv2/dnn/dnn.hpp>
 #include <opencv2/core.hpp>
@ -106,28 +126,35 @@ void FaceRecognizer::clearReferances()
 	referanceFeatures.clear();
 }

-std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
+FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
 {
 	cv::Mat faces = detectFaces(input);

-	if(alone && faces.rows > 1)
-		return {-2, 0};
+	Detection bestMatch;
+	bestMatch.confidence = 0;
+	bestMatch.person = -1;

-	std::pair<int, double> bestMatch = {-1, 0};
+	if(alone && faces.rows > 1)
+	{
+		bestMatch.person = -2;
+		return bestMatch;
+	}

 	for(int i = 0; i < faces.rows; ++i)
 	{
 		cv::Mat face;
-		recognizer->alignCrop(input, faces.row(0), face);
+		recognizer->alignCrop(input, faces.row(i), face);
 		cv::Mat features;
 		recognizer->feature(face, features);
 		features = features.clone();
 		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
 		{
 			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
-			if(score > threshold && score > bestMatch.second)
+			if(score > threshold && score > bestMatch.confidence)
 			{
-				bestMatch = {referanceIndex, score};
+				bestMatch.confidence = score;
+				bestMatch.person = referanceIndex;
+				bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
 			}
 		}
 	}
--- a/SmartCrop/facerecognizer.h
+++ b/SmartCrop/facerecognizer.h
@ -1,3 +1,22 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
 #pragma once
 #include <exception>
 #include <opencv2/core/mat.hpp>
@ -11,6 +30,13 @@ class FaceRecognizer
 {
 public:

+	struct Detection
+	{
+		int person;
+		float confidence;
+		cv::Rect rect;
+	};
+
 	class LoadException : public std::exception
 	{
 	private:
@ -33,7 +59,7 @@ private:
 public:
 	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
 	cv::Mat detectFaces(const cv::Mat& input);
-	std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
+	Detection isMatch(const cv::Mat& input, bool alone = false);
 	bool addReferances(const std::vector<cv::Mat>& referances);
 	void setThreshold(double threashold);
 	double getThreshold();
--- a/SmartCrop/incbin.h
+++ b/SmartCrop/incbin.h
@ -1,3 +1,22 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
 /**
 * @file incbin.h
 * @author Dale Weiler
--- a/SmartCrop/intelligentroi.cpp
+++ b/SmartCrop/intelligentroi.cpp
@ -1,3 +1,23 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include "intelligentroi.h"

 #include <opencv2/imgproc.hpp>
--- a/SmartCrop/intelligentroi.h
+++ b/SmartCrop/intelligentroi.h
@ -0,0 +1,37 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <opencv2/imgproc.hpp>
+
+#include "yolo.h"
+
+class InteligentRoi
+{
+private:
+	int personId;
+	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
+	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
+	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
+
+public:
+	InteligentRoi(const Yolo& yolo);
+	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
+};
--- a/SmartCrop/log.cpp
+++ b/SmartCrop/log.cpp
--- a/SmartCrop/log.h
+++ b/SmartCrop/log.h
--- a/SmartCrop/main.cpp
+++ b/SmartCrop/main.cpp
@ -1,3 +1,23 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include <filesystem>
 #include <iostream>
 #include <opencv2/core.hpp>
@ -243,12 +263,12 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
 		if(image.cols > image.rows)
 		{
 			double ratio = static_cast<double>(longTargetSize)/image.cols;
-			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
+			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
 		}
 		else
 		{
 			double ratio = static_cast<double>(longTargetSize)/image.rows;
-			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, cv::INTER_CUBIC);
+			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
 		}
 	}
 }
@ -276,12 +296,13 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
 		{
 			cv::Mat person = image(detection.box);
 			reconizerMutex.lock();
-			std::pair<int, double> match = recognizer->isMatch(person);
+			FaceRecognizer::Detection match = recognizer->isMatch(person);
 			reconizerMutex.unlock();
-			if(match.first >= 0)
+			if(match.person >= 0)
 			{
 				detection.priority += 10;
 				hasmatch = true;
+				detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
 			}
 		}
 		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
@ -429,7 +450,7 @@ int main(int argc, char* argv[])
 	std::vector<std::thread> threads;
 	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());

-	for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
+	for(size_t i = 0; i < imagePathParts.size(); ++i)
 		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));

 	for(std::thread& thread : threads)
--- a/SmartCrop/options.h
+++ b/SmartCrop/options.h
@ -1,3 +1,22 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
 #pragma once
 #include <string>
 #include <vector>
--- a/SmartCrop/readfile.h
+++ b/SmartCrop/readfile.h
@ -0,0 +1,35 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <string>
+#include <filesystem>
+#include <fstream>
+#include <stdexcept>
+#include <sstream>
+
+inline std::string readFile(const std::filesystem::path& path)
+{
+	std::ifstream file(path);
+	if(!file.is_open())
+		throw std::runtime_error(std::string("could not open file ") + path.string());
+	std::stringstream ss;
+	ss<<file.rdbuf();
+	return ss.str();
+}
--- a/SmartCrop/seamcarving.cpp
+++ b/SmartCrop/seamcarving.cpp
@ -1,3 +1,23 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include "seamcarving.h"

 #include <opencv2/imgcodecs.hpp>
--- a/SmartCrop/seamcarving.h
+++ b/SmartCrop/seamcarving.h
@ -1,3 +1,22 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
 #pragma once

 #include <opencv2/core/core.hpp>
--- a/SmartCrop/tokenize.cpp
+++ b/SmartCrop/tokenize.cpp
@ -0,0 +1,46 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "tokenize.h"
+
+
+std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
+{
+	std::vector<std::string> tokens;
+	std::string token;
+	bool inBaracket = false;
+	for(size_t i = 0; i < str.size(); ++i)
+	{
+		if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
+		{
+			tokens.push_back(token);
+			token.clear();
+		}
+		else
+		{
+			token.push_back(str[i]);
+		}
+		if(ignoreBraket == str[i])
+			inBaracket = !inBaracket;
+	}
+	if(!inBaracket)
+		tokens.push_back(token);
+	return tokens;
+}
--- a/SmartCrop/tokenize.h
+++ b/SmartCrop/tokenize.h
@ -0,0 +1,26 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
+											  const char escapeChar = '\0');
--- a/SmartCrop/utils.cpp
+++ b/SmartCrop/utils.cpp
@ -1,3 +1,23 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include "utils.h"

 #include <filesystem>
--- a/SmartCrop/utils.h
+++ b/SmartCrop/utils.h
@ -0,0 +1,34 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <filesystem>
+#include <vector>
+#include <opencv2/imgproc.hpp>
+
+bool isImagePath(const std::filesystem::path& path);
+
+void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
+
+cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
+
+double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
+
+bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
--- a/SmartCrop/yolo.cpp
+++ b/SmartCrop/yolo.cpp
@ -1,3 +1,23 @@
+//
+// SmartCrop - A tool for content aware croping of images
+// Copyright (C) 2024 Carl Philipp Klemm
+//
+// This file is part of SmartCrop.
+//
+// SmartCrop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SmartCrop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+//
+
 #include <opencv2/dnn/dnn.hpp>
 #include <algorithm>
 #include <string>
@ -11,8 +31,8 @@
 #define INCBIN_PREFIX r
 #include "incbin.h"

-INCTXT(defaultClasses, "../classes.txt");
-INCBIN(defaultModel, "../yolov8x.onnx");
+INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
+INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");

 Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
 		const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
--- a/SmartCrop/yolo.h
+++ b/SmartCrop/yolo.h
@ -1,3 +1,22 @@
+/* * SmartCrop - A tool for content aware croping of images
+ * Copyright (C) 2024 Carl Philipp Klemm
+ *
+ * This file is part of SmartCrop.
+ *
+ * SmartCrop is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SmartCrop is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
 #pragma once

 #include <fstream>
--- a/Weights/classes.txt
+++ b/Weights/classes.txt
--- a/Weights/face_detection_yunet_2023mar.onnx
+++ b/Weights/face_detection_yunet_2023mar.onnx
--- a/Weights/face_recognition_sface_2021dec.onnx
+++ b/Weights/face_recognition_sface_2021dec.onnx
--- a/intelligentroi.h
+++ b/intelligentroi.h
@ -1,18 +0,0 @@
-#pragma once
-
-#include <opencv2/imgproc.hpp>
-
-#include "yolo.h"
-
-class InteligentRoi
-{
-private:
-	int personId;
-	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
-	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
-	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
-
-public:
-	InteligentRoi(const Yolo& yolo);
-	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
-};
--- a/readfile.h
+++ b/readfile.h
@ -1,16 +0,0 @@
-#pragma once
-#include <string>
-#include <filesystem>
-#include <fstream>
-#include <stdexcept>
-#include <sstream>
-
-inline std::string readFile(const std::filesystem::path& path)
-{
-	std::ifstream file(path);
-	if(!file.is_open())
-		throw std::runtime_error(std::string("could not open file ") + path.string());
-	std::stringstream ss;
-	ss<<file.rdbuf();
-	return ss.str();
-}
--- a/tokenize.cpp
+++ b/tokenize.cpp
@ -1,26 +0,0 @@
-#include "tokenize.h"
-
-
-std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
-{
-	std::vector<std::string> tokens;
-	std::string token;
-	bool inBaracket = false;
-	for(size_t i = 0; i < str.size(); ++i)
-	{
-		if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
-		{
-			tokens.push_back(token);
-			token.clear();
-		}
-		else
-		{
-			token.push_back(str[i]);
-		}
-		if(ignoreBraket == str[i])
-			inBaracket = !inBaracket;
-	}
-	if(!inBaracket)
-		tokens.push_back(token);
-	return tokens;
-}
--- a/tokenize.h
+++ b/tokenize.h
@ -1,7 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-
-std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
-											  const char escapeChar = '\0');
--- a/utils.h
+++ b/utils.h
@ -1,15 +0,0 @@
-#pragma once
-
-#include <filesystem>
-#include <vector>
-#include <opencv2/imgproc.hpp>
-
-bool isImagePath(const std::filesystem::path& path);
-
-void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
-
-cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
-
-double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
-
-bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
--- a/yolov8x.onnx
+++ b/yolov8x.onnx
Author	SHA1	Message	Date
uvos	c57d6124fa	add readmes	2024-06-14 00:09:14 +02:00
uvos	fa52615390	PersonDatasetAssembler: add requirements.txt	2024-06-14 00:08:34 +02:00
uvos	882f91a8a6	LLavaTagger: correct requirements	2024-06-13 23:52:52 +02:00
uvos	e12ede007a	add requiremetns for LLavaTagger	2024-06-13 23:47:10 +02:00
uvos	a6517351ea	remove unneded file	2024-06-13 23:44:31 +02:00
uvos	6e05c0fdb5	add legalease	2024-06-13 23:39:49 +02:00
uvos	44c03310d8	update danboorutagger	2024-06-10 19:41:51 +02:00
uvos	7b7dbc41ac	add inital non-working Danbooru tagger as a addition to llava tagging	2024-06-07 14:09:36 +02:00
uvos	555efd4af6	Add the option to focus on a spcific person	2024-06-07 14:04:48 +02:00
uvos	ccefb86c1a	PersonDatasetAssembler: add the option to mach images that do NOT contain the specified person	2024-06-07 14:04:07 +02:00
uvos	2765509bf3	Add person dataset assembler, restructure repo	2024-04-05 12:46:06 +02:00