initial commit
This commit is contained in:
		
						commit
						cd1e2756bc
					
				
					 39 changed files with 4163 additions and 0 deletions
				
			
		
							
								
								
									
										7
									
								
								CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | ||||||
|  | cmake_minimum_required(VERSION 3.6) | ||||||
|  | project(ImageAiUtils) | ||||||
|  | 
 | ||||||
|  | set(CMAKE_CXX_STANDARD 17) | ||||||
|  | set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights) | ||||||
|  | 
 | ||||||
|  | add_subdirectory(SmartCrop) | ||||||
							
								
								
									
										105
									
								
								DanbooruTagger/DanbooruTagger.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								DanbooruTagger/DanbooruTagger.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,105 @@ | ||||||
|  | import warnings | ||||||
|  | from deepdanbooru_onnx import DeepDanbooru | ||||||
|  | from PIL import Image | ||||||
|  | import argparse | ||||||
|  | import cv2 | ||||||
|  | import os | ||||||
|  | from multiprocessing import Process, Queue | ||||||
|  | import json | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def find_image_files(path: str) -> list[str]: | ||||||
|  | 	paths = list() | ||||||
|  | 	for root, dirs, files in os.walk(path): | ||||||
|  | 		for filename in files: | ||||||
|  | 			name, extension = os.path.splitext(filename) | ||||||
|  | 			if extension.lower() in image_ext_ocv: | ||||||
|  | 				paths.append(os.path.join(root, filename)) | ||||||
|  | 	return paths | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def image_loader(paths: list[str]): | ||||||
|  | 	for path in paths: | ||||||
|  | 		name, extension = os.path.splitext(path) | ||||||
|  | 		extension = extension.lower() | ||||||
|  | 		imagebgr = cv2.imread(path) | ||||||
|  | 		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB) | ||||||
|  | 		if image is None: | ||||||
|  | 			print(f"Warning: could not load {path}") | ||||||
|  | 		else: | ||||||
|  | 			image_pil = Image.fromarray(image) | ||||||
|  | 			yield image_pil, path | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def pipeline(queue: Queue, image_paths: list[str], device: int): | ||||||
|  | 	danbooru = DeepDanbooru() | ||||||
|  | 
 | ||||||
|  | 	for path in image_paths: | ||||||
|  | 		imageprompt = "" | ||||||
|  | 		tags = danbooru(path) | ||||||
|  | 		for tag in tags: | ||||||
|  | 			imageprompt = imageprompt + ", " + tag | ||||||
|  | 
 | ||||||
|  | 		queue.put({"file_name": path, "text": imageprompt}) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def split_list(input_list, count): | ||||||
|  | 	target_length = int(len(input_list) / count) | ||||||
|  | 	for i in range(0, count - 1): | ||||||
|  | 		yield input_list[i * target_length: (i + 1) * target_length] | ||||||
|  | 	yield input_list[(count - 1) * target_length: len(input_list)] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def save_meta(meta_file, meta, reldir, common_description): | ||||||
|  | 	meta["file_name"] = os.path.relpath(meta["file_name"], reldir) | ||||||
|  | 	if common_description is not None: | ||||||
|  | 		meta["text"] = common_description + meta["text"] | ||||||
|  | 	meta_file.write(json.dumps(meta) + '\n') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  | 	parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru") | ||||||
|  | 	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference") | ||||||
|  | 	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one") | ||||||
|  | 	parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag") | ||||||
|  | 	args = parser.parse_args() | ||||||
|  | 
 | ||||||
|  | 	nparalell = 2 | ||||||
|  | 
 | ||||||
|  | 	image_paths = find_image_files(args.image_dir) | ||||||
|  | 	image_path_chunks = list(split_list(image_paths, nparalell)) | ||||||
|  | 
 | ||||||
|  | 	print(f"Will use {nparalell} processies to create tags") | ||||||
|  | 
 | ||||||
|  | 	queue = Queue() | ||||||
|  | 	processies = list() | ||||||
|  | 	for i in range(0, nparalell): | ||||||
|  | 		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i))) | ||||||
|  | 		processies[-1].start() | ||||||
|  | 
 | ||||||
|  | 	progress = tqdm(desc="Generateing tags", total=len(image_paths)) | ||||||
|  | 	exit = False | ||||||
|  | 	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file: | ||||||
|  | 		while not exit: | ||||||
|  | 			if not queue.empty(): | ||||||
|  | 				meta = queue.get() | ||||||
|  | 				save_meta(output_file, meta, args.image_dir, args.common_description) | ||||||
|  | 				progress.update() | ||||||
|  | 			exit = True | ||||||
|  | 			for process in processies: | ||||||
|  | 				if process.is_alive(): | ||||||
|  | 					exit = False | ||||||
|  | 					break | ||||||
|  | 
 | ||||||
|  | 		while not queue.empty(): | ||||||
|  | 			meta = queue.get() | ||||||
|  | 			save_meta(output_file, meta, args.image_dir, args.common_description) | ||||||
|  | 			progress.update() | ||||||
|  | 
 | ||||||
|  | 	for process in processies: | ||||||
|  | 		process.join() | ||||||
|  | 
 | ||||||
							
								
								
									
										3
									
								
								DanbooruTagger/deepdanbooru_onnx/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								DanbooruTagger/deepdanbooru_onnx/__init__.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,3 @@ | ||||||
|  | from .deepdanbooru_onnx import DeepDanbooru | ||||||
|  | from .deepdanbooru_onnx import process_image | ||||||
|  | __version__ = '0.0.8' | ||||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										244
									
								
								DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,244 @@ | ||||||
|  | import onnxruntime as ort | ||||||
|  | from PIL import Image | ||||||
|  | import numpy as np | ||||||
|  | import os | ||||||
|  | from tqdm import tqdm | ||||||
|  | import requests | ||||||
|  | import hashlib | ||||||
|  | from typing import List, Union | ||||||
|  | import shutil | ||||||
|  | from pathlib import Path | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def process_image(image: Image.Image) -> np.ndarray: | ||||||
|  |     """ | ||||||
|  |     Convert an image to a numpy array. | ||||||
|  |     :param image: the image to convert | ||||||
|  |     :return: the numpy array | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     image = image.convert("RGB").resize((512, 512)) | ||||||
|  |     image = np.array(image).astype(np.float32) / 255 | ||||||
|  |     image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1)) | ||||||
|  |     return image | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def download(url: str, save_path: str, md5: str, length: str) -> bool: | ||||||
|  |     """ | ||||||
|  |     Download a file from url to save_path. | ||||||
|  |     If the file already exists, check its md5. | ||||||
|  |     If the md5 matches, return True,if the md5 doesn't match, return False. | ||||||
|  |     :param url: the url of the file to download | ||||||
|  |     :param save_path: the path to save the file | ||||||
|  |     :param md5: the md5 of the file | ||||||
|  |     :param length: the length of the file | ||||||
|  |     :return: True if the file is downloaded successfully, False otherwise | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         response = requests.get(url=url, stream=True) | ||||||
|  |         with open(save_path, "wb") as f: | ||||||
|  |             with tqdm.wrapattr( | ||||||
|  |                 response.raw, "read", total=length, desc="Downloading" | ||||||
|  |             ) as r_raw: | ||||||
|  |                 shutil.copyfileobj(r_raw, f) | ||||||
|  |         return ( | ||||||
|  |             True | ||||||
|  |             if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5 | ||||||
|  |             else False | ||||||
|  |         ) | ||||||
|  |     except Exception as e: | ||||||
|  |         print(e) | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def download_model(): | ||||||
|  |     """ | ||||||
|  |     Download the model and tags file from the server. | ||||||
|  |     :return: the path to the model and tags file | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     model_url = ( | ||||||
|  |         "https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx" | ||||||
|  |     ) | ||||||
|  |     tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt" | ||||||
|  |     model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f" | ||||||
|  |     tags_md5 = "a3f764de985cdeba89f1d232a4204402" | ||||||
|  |     model_length = 643993025 | ||||||
|  |     tags_length = 133810 | ||||||
|  | 
 | ||||||
|  |     home = str(Path.home()) + "/.deepdanbooru_onnx/" | ||||||
|  |     if not os.path.exists(home): | ||||||
|  |         os.mkdir(home) | ||||||
|  | 
 | ||||||
|  |     model_name = "deepdanbooru.onnx" | ||||||
|  |     tags_name = "tags.txt" | ||||||
|  | 
 | ||||||
|  |     model_path = home + model_name | ||||||
|  |     tags_path = home + tags_name | ||||||
|  |     if os.path.exists(model_path): | ||||||
|  |         if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5: | ||||||
|  |             os.remove(model_path) | ||||||
|  |             if not download(model_url, model_path, model_md5, model_length): | ||||||
|  |                 raise ValueError("Model download failed") | ||||||
|  | 
 | ||||||
|  |     else: | ||||||
|  |         if not download(model_url, model_path, model_md5, model_length): | ||||||
|  |             raise ValueError("Model download failed") | ||||||
|  | 
 | ||||||
|  |     if os.path.exists(tags_path): | ||||||
|  |         if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5: | ||||||
|  |             os.remove(tags_path) | ||||||
|  |             if not download(tags_url, tags_path, tags_md5, tags_length): | ||||||
|  |                 raise ValueError("Tags download failed") | ||||||
|  |     else: | ||||||
|  |         if not download(tags_url, tags_path, tags_md5, tags_length): | ||||||
|  |             raise ValueError("Tags download failed") | ||||||
|  |     return model_path, tags_path | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DeepDanbooru: | ||||||
|  |     def __init__( | ||||||
|  |         self, | ||||||
|  |         mode: str = "auto", | ||||||
|  |         model_path: Union[str, None] = None, | ||||||
|  |         tags_path: Union[str, None] = None, | ||||||
|  |         threshold: Union[float, int] = 0.6, | ||||||
|  |         pin_memory: bool = False, | ||||||
|  |         batch_size: int = 1, | ||||||
|  |     ): | ||||||
|  |         """ | ||||||
|  |         Initialize the DeepDanbooru class. | ||||||
|  |         :param mode: the mode of the model, "cpu" or "gpu" or "auto" | ||||||
|  |         :param model_path: the path to the model file | ||||||
|  |         :param tags_path: the path to the tags file | ||||||
|  |         :param threshold: the threshold of the model | ||||||
|  |         :param pin_memory: whether to use pin memory | ||||||
|  |         :param batch_size: the batch size of the model | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         providers = { | ||||||
|  |             "cpu": "CPUExecutionProvider", | ||||||
|  |             "gpu": "CUDAExecutionProvider", | ||||||
|  |             "tensorrt": "TensorrtExecutionProvider", | ||||||
|  |             "auto": ( | ||||||
|  |                 "CUDAExecutionProvider" | ||||||
|  |                 if "CUDAExecutionProvider" in ort.get_available_providers() | ||||||
|  |                 else "CPUExecutionProvider" | ||||||
|  |             ), | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if not (isinstance(threshold, float) or isinstance(threshold, int)): | ||||||
|  |             raise TypeError("threshold must be float or int") | ||||||
|  |         if threshold < 0 or threshold > 1: | ||||||
|  |             raise ValueError("threshold must be between 0 and 1") | ||||||
|  |         if mode not in providers: | ||||||
|  |             raise ValueError( | ||||||
|  |                 "Mode not supported. Please choose from: cpu, gpu, tensorrt" | ||||||
|  |             ) | ||||||
|  |         if providers[mode] not in ort.get_available_providers(): | ||||||
|  |             raise ValueError( | ||||||
|  |                 f"Your device is not supported {mode}. Please choose from: cpu" | ||||||
|  |             ) | ||||||
|  |         if model_path is not None and not os.path.exists(model_path): | ||||||
|  |             raise FileNotFoundError("Model file not found") | ||||||
|  |         if tags_path is not None and not os.path.exists(tags_path): | ||||||
|  |             raise FileNotFoundError("Tags file not found") | ||||||
|  | 
 | ||||||
|  |         if model_path is None or tags_path is None: | ||||||
|  |             model_path, tags_path = download_model() | ||||||
|  | 
 | ||||||
|  |         self.session = ort.InferenceSession(model_path, providers=[providers[mode]]) | ||||||
|  |         self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()] | ||||||
|  | 
 | ||||||
|  |         self.input_name = self.session.get_inputs()[0].name | ||||||
|  |         self.output_name = [output.name for output in self.session.get_outputs()] | ||||||
|  |         self.threshold = threshold | ||||||
|  |         self.pin_memory = pin_memory | ||||||
|  |         self.batch_size = batch_size | ||||||
|  |         self.mode = mode | ||||||
|  |         self.cache = {} | ||||||
|  | 
 | ||||||
|  |     def __str__(self) -> str: | ||||||
|  |         return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})" | ||||||
|  | 
 | ||||||
|  |     def __repr__(self) -> str: | ||||||
|  |         return self.__str__() | ||||||
|  | 
 | ||||||
|  |     def from_image_inference(self, image: Image.Image) -> dict: | ||||||
|  |         image = process_image(image) | ||||||
|  |         return self.predict(image) | ||||||
|  | 
 | ||||||
|  |     def from_ndarray_inferece(self, image: np.ndarray) -> dict: | ||||||
|  |         if image.shape != (1, 512, 512, 3): | ||||||
|  |             raise ValueError(f"Image must be {(1, 512, 512, 3)}") | ||||||
|  |         return self.predict(image) | ||||||
|  | 
 | ||||||
|  |     def from_file_inference(self, image: str) -> dict: | ||||||
|  |         return self.from_image_inference(Image.open(image)) | ||||||
|  | 
 | ||||||
|  |     def from_list_inference(self, image: Union[list, tuple]) -> List[dict]: | ||||||
|  |         if self.pin_memory: | ||||||
|  |             image = [process_image(Image.open(i)) for i in image] | ||||||
|  |         for i in [ | ||||||
|  |             image[i : i + self.batch_size] | ||||||
|  |             for i in range(0, len(image), self.batch_size) | ||||||
|  |         ]: | ||||||
|  |             imagelist = i | ||||||
|  |             bs = len(i) | ||||||
|  |             _imagelist, idx, hashlist = [], [], [] | ||||||
|  |             for j in range(len(i)): | ||||||
|  |                 img = Image.open(i[j]) if not self.pin_memory else imagelist[j] | ||||||
|  |                 image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest() | ||||||
|  |                 hashlist.append(image_hash) | ||||||
|  |                 if image_hash in self.cache: | ||||||
|  |                     continue | ||||||
|  |                 if not self.pin_memory: | ||||||
|  |                     _imagelist.append(process_image(img)) | ||||||
|  |                 else: | ||||||
|  |                     _imagelist.append(imagelist[j]) | ||||||
|  |                 idx.append(j) | ||||||
|  | 
 | ||||||
|  |             imagelist = _imagelist | ||||||
|  |             if len(imagelist) != 0: | ||||||
|  |                 _image = np.vstack(imagelist) | ||||||
|  |                 results = self.inference(_image) | ||||||
|  |                 results_idx = 0 | ||||||
|  |             else: | ||||||
|  |                 results = [] | ||||||
|  | 
 | ||||||
|  |             for i in range(bs): | ||||||
|  |                 image_tag = {} | ||||||
|  |                 if i in idx: | ||||||
|  |                     hash = hashlist[i] | ||||||
|  |                     for tag, score in zip(self.tags, results[results_idx]): | ||||||
|  |                         if score >= self.threshold: | ||||||
|  |                             image_tag[tag] = score | ||||||
|  |                     results_idx += 1 | ||||||
|  |                     self.cache[hash] = image_tag | ||||||
|  |                     yield image_tag | ||||||
|  |                 else: | ||||||
|  |                     yield self.cache[hashlist[i]] | ||||||
|  | 
 | ||||||
|  |     def inference(self, image): | ||||||
|  |         return self.session.run(self.output_name, {self.input_name: image})[0] | ||||||
|  | 
 | ||||||
|  |     def predict(self, image): | ||||||
|  |         result = self.inference(image) | ||||||
|  |         image_tag = {} | ||||||
|  |         for tag, score in zip(self.tags, result[0]): | ||||||
|  |             if score >= self.threshold: | ||||||
|  |                 image_tag[tag] = score | ||||||
|  |         return image_tag | ||||||
|  | 
 | ||||||
|  |     def __call__(self, image) -> Union[dict, List[dict]]: | ||||||
|  |         if isinstance(image, str): | ||||||
|  |             return self.from_file_inference(image) | ||||||
|  |         elif isinstance(image, np.ndarray): | ||||||
|  |             return self.from_ndarray_inferece(image) | ||||||
|  |         elif isinstance(image, list) or isinstance(image, tuple): | ||||||
|  |             return self.from_list_inference(image) | ||||||
|  |         elif isinstance(image, Image.Image): | ||||||
|  |             return self.from_image_inference(image) | ||||||
|  |         else: | ||||||
|  |             raise ValueError("Image must be a file path or a numpy array or list/tuple") | ||||||
							
								
								
									
										674
									
								
								LICENSE
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										674
									
								
								LICENSE
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,674 @@ | ||||||
|  |                     GNU GENERAL PUBLIC LICENSE | ||||||
|  |                        Version 3, 29 June 2007 | ||||||
|  | 
 | ||||||
|  |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> | ||||||
|  |  Everyone is permitted to copy and distribute verbatim copies | ||||||
|  |  of this license document, but changing it is not allowed. | ||||||
|  | 
 | ||||||
|  |                             Preamble | ||||||
|  | 
 | ||||||
|  |   The GNU General Public License is a free, copyleft license for | ||||||
|  | software and other kinds of works. | ||||||
|  | 
 | ||||||
|  |   The licenses for most software and other practical works are designed | ||||||
|  | to take away your freedom to share and change the works.  By contrast, | ||||||
|  | the GNU General Public License is intended to guarantee your freedom to | ||||||
|  | share and change all versions of a program--to make sure it remains free | ||||||
|  | software for all its users.  We, the Free Software Foundation, use the | ||||||
|  | GNU General Public License for most of our software; it applies also to | ||||||
|  | any other work released this way by its authors.  You can apply it to | ||||||
|  | your programs, too. | ||||||
|  | 
 | ||||||
|  |   When we speak of free software, we are referring to freedom, not | ||||||
|  | price.  Our General Public Licenses are designed to make sure that you | ||||||
|  | have the freedom to distribute copies of free software (and charge for | ||||||
|  | them if you wish), that you receive source code or can get it if you | ||||||
|  | want it, that you can change the software or use pieces of it in new | ||||||
|  | free programs, and that you know you can do these things. | ||||||
|  | 
 | ||||||
|  |   To protect your rights, we need to prevent others from denying you | ||||||
|  | these rights or asking you to surrender the rights.  Therefore, you have | ||||||
|  | certain responsibilities if you distribute copies of the software, or if | ||||||
|  | you modify it: responsibilities to respect the freedom of others. | ||||||
|  | 
 | ||||||
|  |   For example, if you distribute copies of such a program, whether | ||||||
|  | gratis or for a fee, you must pass on to the recipients the same | ||||||
|  | freedoms that you received.  You must make sure that they, too, receive | ||||||
|  | or can get the source code.  And you must show them these terms so they | ||||||
|  | know their rights. | ||||||
|  | 
 | ||||||
|  |   Developers that use the GNU GPL protect your rights with two steps: | ||||||
|  | (1) assert copyright on the software, and (2) offer you this License | ||||||
|  | giving you legal permission to copy, distribute and/or modify it. | ||||||
|  | 
 | ||||||
|  |   For the developers' and authors' protection, the GPL clearly explains | ||||||
|  | that there is no warranty for this free software.  For both users' and | ||||||
|  | authors' sake, the GPL requires that modified versions be marked as | ||||||
|  | changed, so that their problems will not be attributed erroneously to | ||||||
|  | authors of previous versions. | ||||||
|  | 
 | ||||||
|  |   Some devices are designed to deny users access to install or run | ||||||
|  | modified versions of the software inside them, although the manufacturer | ||||||
|  | can do so.  This is fundamentally incompatible with the aim of | ||||||
|  | protecting users' freedom to change the software.  The systematic | ||||||
|  | pattern of such abuse occurs in the area of products for individuals to | ||||||
|  | use, which is precisely where it is most unacceptable.  Therefore, we | ||||||
|  | have designed this version of the GPL to prohibit the practice for those | ||||||
|  | products.  If such problems arise substantially in other domains, we | ||||||
|  | stand ready to extend this provision to those domains in future versions | ||||||
|  | of the GPL, as needed to protect the freedom of users. | ||||||
|  | 
 | ||||||
|  |   Finally, every program is threatened constantly by software patents. | ||||||
|  | States should not allow patents to restrict development and use of | ||||||
|  | software on general-purpose computers, but in those that do, we wish to | ||||||
|  | avoid the special danger that patents applied to a free program could | ||||||
|  | make it effectively proprietary.  To prevent this, the GPL assures that | ||||||
|  | patents cannot be used to render the program non-free. | ||||||
|  | 
 | ||||||
|  |   The precise terms and conditions for copying, distribution and | ||||||
|  | modification follow. | ||||||
|  | 
 | ||||||
|  |                        TERMS AND CONDITIONS | ||||||
|  | 
 | ||||||
|  |   0. Definitions. | ||||||
|  | 
 | ||||||
|  |   "This License" refers to version 3 of the GNU General Public License. | ||||||
|  | 
 | ||||||
|  |   "Copyright" also means copyright-like laws that apply to other kinds of | ||||||
|  | works, such as semiconductor masks. | ||||||
|  | 
 | ||||||
|  |   "The Program" refers to any copyrightable work licensed under this | ||||||
|  | License.  Each licensee is addressed as "you".  "Licensees" and | ||||||
|  | "recipients" may be individuals or organizations. | ||||||
|  | 
 | ||||||
|  |   To "modify" a work means to copy from or adapt all or part of the work | ||||||
|  | in a fashion requiring copyright permission, other than the making of an | ||||||
|  | exact copy.  The resulting work is called a "modified version" of the | ||||||
|  | earlier work or a work "based on" the earlier work. | ||||||
|  | 
 | ||||||
|  |   A "covered work" means either the unmodified Program or a work based | ||||||
|  | on the Program. | ||||||
|  | 
 | ||||||
|  |   To "propagate" a work means to do anything with it that, without | ||||||
|  | permission, would make you directly or secondarily liable for | ||||||
|  | infringement under applicable copyright law, except executing it on a | ||||||
|  | computer or modifying a private copy.  Propagation includes copying, | ||||||
|  | distribution (with or without modification), making available to the | ||||||
|  | public, and in some countries other activities as well. | ||||||
|  | 
 | ||||||
|  |   To "convey" a work means any kind of propagation that enables other | ||||||
|  | parties to make or receive copies.  Mere interaction with a user through | ||||||
|  | a computer network, with no transfer of a copy, is not conveying. | ||||||
|  | 
 | ||||||
|  |   An interactive user interface displays "Appropriate Legal Notices" | ||||||
|  | to the extent that it includes a convenient and prominently visible | ||||||
|  | feature that (1) displays an appropriate copyright notice, and (2) | ||||||
|  | tells the user that there is no warranty for the work (except to the | ||||||
|  | extent that warranties are provided), that licensees may convey the | ||||||
|  | work under this License, and how to view a copy of this License.  If | ||||||
|  | the interface presents a list of user commands or options, such as a | ||||||
|  | menu, a prominent item in the list meets this criterion. | ||||||
|  | 
 | ||||||
|  |   1. Source Code. | ||||||
|  | 
 | ||||||
|  |   The "source code" for a work means the preferred form of the work | ||||||
|  | for making modifications to it.  "Object code" means any non-source | ||||||
|  | form of a work. | ||||||
|  | 
 | ||||||
|  |   A "Standard Interface" means an interface that either is an official | ||||||
|  | standard defined by a recognized standards body, or, in the case of | ||||||
|  | interfaces specified for a particular programming language, one that | ||||||
|  | is widely used among developers working in that language. | ||||||
|  | 
 | ||||||
|  |   The "System Libraries" of an executable work include anything, other | ||||||
|  | than the work as a whole, that (a) is included in the normal form of | ||||||
|  | packaging a Major Component, but which is not part of that Major | ||||||
|  | Component, and (b) serves only to enable use of the work with that | ||||||
|  | Major Component, or to implement a Standard Interface for which an | ||||||
|  | implementation is available to the public in source code form.  A | ||||||
|  | "Major Component", in this context, means a major essential component | ||||||
|  | (kernel, window system, and so on) of the specific operating system | ||||||
|  | (if any) on which the executable work runs, or a compiler used to | ||||||
|  | produce the work, or an object code interpreter used to run it. | ||||||
|  | 
 | ||||||
|  |   The "Corresponding Source" for a work in object code form means all | ||||||
|  | the source code needed to generate, install, and (for an executable | ||||||
|  | work) run the object code and to modify the work, including scripts to | ||||||
|  | control those activities.  However, it does not include the work's | ||||||
|  | System Libraries, or general-purpose tools or generally available free | ||||||
|  | programs which are used unmodified in performing those activities but | ||||||
|  | which are not part of the work.  For example, Corresponding Source | ||||||
|  | includes interface definition files associated with source files for | ||||||
|  | the work, and the source code for shared libraries and dynamically | ||||||
|  | linked subprograms that the work is specifically designed to require, | ||||||
|  | such as by intimate data communication or control flow between those | ||||||
|  | subprograms and other parts of the work. | ||||||
|  | 
 | ||||||
|  |   The Corresponding Source need not include anything that users | ||||||
|  | can regenerate automatically from other parts of the Corresponding | ||||||
|  | Source. | ||||||
|  | 
 | ||||||
|  |   The Corresponding Source for a work in source code form is that | ||||||
|  | same work. | ||||||
|  | 
 | ||||||
|  |   2. Basic Permissions. | ||||||
|  | 
 | ||||||
|  |   All rights granted under this License are granted for the term of | ||||||
|  | copyright on the Program, and are irrevocable provided the stated | ||||||
|  | conditions are met.  This License explicitly affirms your unlimited | ||||||
|  | permission to run the unmodified Program.  The output from running a | ||||||
|  | covered work is covered by this License only if the output, given its | ||||||
|  | content, constitutes a covered work.  This License acknowledges your | ||||||
|  | rights of fair use or other equivalent, as provided by copyright law. | ||||||
|  | 
 | ||||||
|  |   You may make, run and propagate covered works that you do not | ||||||
|  | convey, without conditions so long as your license otherwise remains | ||||||
|  | in force.  You may convey covered works to others for the sole purpose | ||||||
|  | of having them make modifications exclusively for you, or provide you | ||||||
|  | with facilities for running those works, provided that you comply with | ||||||
|  | the terms of this License in conveying all material for which you do | ||||||
|  | not control copyright.  Those thus making or running the covered works | ||||||
|  | for you must do so exclusively on your behalf, under your direction | ||||||
|  | and control, on terms that prohibit them from making any copies of | ||||||
|  | your copyrighted material outside their relationship with you. | ||||||
|  | 
 | ||||||
|  |   Conveying under any other circumstances is permitted solely under | ||||||
|  | the conditions stated below.  Sublicensing is not allowed; section 10 | ||||||
|  | makes it unnecessary. | ||||||
|  | 
 | ||||||
|  |   3. Protecting Users' Legal Rights From Anti-Circumvention Law. | ||||||
|  | 
 | ||||||
|  |   No covered work shall be deemed part of an effective technological | ||||||
|  | measure under any applicable law fulfilling obligations under article | ||||||
|  | 11 of the WIPO copyright treaty adopted on 20 December 1996, or | ||||||
|  | similar laws prohibiting or restricting circumvention of such | ||||||
|  | measures. | ||||||
|  | 
 | ||||||
|  |   When you convey a covered work, you waive any legal power to forbid | ||||||
|  | circumvention of technological measures to the extent such circumvention | ||||||
|  | is effected by exercising rights under this License with respect to | ||||||
|  | the covered work, and you disclaim any intention to limit operation or | ||||||
|  | modification of the work as a means of enforcing, against the work's | ||||||
|  | users, your or third parties' legal rights to forbid circumvention of | ||||||
|  | technological measures. | ||||||
|  | 
 | ||||||
|  |   4. Conveying Verbatim Copies. | ||||||
|  | 
 | ||||||
|  |   You may convey verbatim copies of the Program's source code as you | ||||||
|  | receive it, in any medium, provided that you conspicuously and | ||||||
|  | appropriately publish on each copy an appropriate copyright notice; | ||||||
|  | keep intact all notices stating that this License and any | ||||||
|  | non-permissive terms added in accord with section 7 apply to the code; | ||||||
|  | keep intact all notices of the absence of any warranty; and give all | ||||||
|  | recipients a copy of this License along with the Program. | ||||||
|  | 
 | ||||||
|  |   You may charge any price or no price for each copy that you convey, | ||||||
|  | and you may offer support or warranty protection for a fee. | ||||||
|  | 
 | ||||||
|  |   5. Conveying Modified Source Versions. | ||||||
|  | 
 | ||||||
|  |   You may convey a work based on the Program, or the modifications to | ||||||
|  | produce it from the Program, in the form of source code under the | ||||||
|  | terms of section 4, provided that you also meet all of these conditions: | ||||||
|  | 
 | ||||||
|  |     a) The work must carry prominent notices stating that you modified | ||||||
|  |     it, and giving a relevant date. | ||||||
|  | 
 | ||||||
|  |     b) The work must carry prominent notices stating that it is | ||||||
|  |     released under this License and any conditions added under section | ||||||
|  |     7.  This requirement modifies the requirement in section 4 to | ||||||
|  |     "keep intact all notices". | ||||||
|  | 
 | ||||||
|  |     c) You must license the entire work, as a whole, under this | ||||||
|  |     License to anyone who comes into possession of a copy.  This | ||||||
|  |     License will therefore apply, along with any applicable section 7 | ||||||
|  |     additional terms, to the whole of the work, and all its parts, | ||||||
|  |     regardless of how they are packaged.  This License gives no | ||||||
|  |     permission to license the work in any other way, but it does not | ||||||
|  |     invalidate such permission if you have separately received it. | ||||||
|  | 
 | ||||||
|  |     d) If the work has interactive user interfaces, each must display | ||||||
|  |     Appropriate Legal Notices; however, if the Program has interactive | ||||||
|  |     interfaces that do not display Appropriate Legal Notices, your | ||||||
|  |     work need not make them do so. | ||||||
|  | 
 | ||||||
|  |   A compilation of a covered work with other separate and independent | ||||||
|  | works, which are not by their nature extensions of the covered work, | ||||||
|  | and which are not combined with it such as to form a larger program, | ||||||
|  | in or on a volume of a storage or distribution medium, is called an | ||||||
|  | "aggregate" if the compilation and its resulting copyright are not | ||||||
|  | used to limit the access or legal rights of the compilation's users | ||||||
|  | beyond what the individual works permit.  Inclusion of a covered work | ||||||
|  | in an aggregate does not cause this License to apply to the other | ||||||
|  | parts of the aggregate. | ||||||
|  | 
 | ||||||
|  |   6. Conveying Non-Source Forms. | ||||||
|  | 
 | ||||||
|  |   You may convey a covered work in object code form under the terms | ||||||
|  | of sections 4 and 5, provided that you also convey the | ||||||
|  | machine-readable Corresponding Source under the terms of this License, | ||||||
|  | in one of these ways: | ||||||
|  | 
 | ||||||
|  |     a) Convey the object code in, or embodied in, a physical product | ||||||
|  |     (including a physical distribution medium), accompanied by the | ||||||
|  |     Corresponding Source fixed on a durable physical medium | ||||||
|  |     customarily used for software interchange. | ||||||
|  | 
 | ||||||
|  |     b) Convey the object code in, or embodied in, a physical product | ||||||
|  |     (including a physical distribution medium), accompanied by a | ||||||
|  |     written offer, valid for at least three years and valid for as | ||||||
|  |     long as you offer spare parts or customer support for that product | ||||||
|  |     model, to give anyone who possesses the object code either (1) a | ||||||
|  |     copy of the Corresponding Source for all the software in the | ||||||
|  |     product that is covered by this License, on a durable physical | ||||||
|  |     medium customarily used for software interchange, for a price no | ||||||
|  |     more than your reasonable cost of physically performing this | ||||||
|  |     conveying of source, or (2) access to copy the | ||||||
|  |     Corresponding Source from a network server at no charge. | ||||||
|  | 
 | ||||||
|  |     c) Convey individual copies of the object code with a copy of the | ||||||
|  |     written offer to provide the Corresponding Source.  This | ||||||
|  |     alternative is allowed only occasionally and noncommercially, and | ||||||
|  |     only if you received the object code with such an offer, in accord | ||||||
|  |     with subsection 6b. | ||||||
|  | 
 | ||||||
|  |     d) Convey the object code by offering access from a designated | ||||||
|  |     place (gratis or for a charge), and offer equivalent access to the | ||||||
|  |     Corresponding Source in the same way through the same place at no | ||||||
|  |     further charge.  You need not require recipients to copy the | ||||||
|  |     Corresponding Source along with the object code.  If the place to | ||||||
|  |     copy the object code is a network server, the Corresponding Source | ||||||
|  |     may be on a different server (operated by you or a third party) | ||||||
|  |     that supports equivalent copying facilities, provided you maintain | ||||||
|  |     clear directions next to the object code saying where to find the | ||||||
|  |     Corresponding Source.  Regardless of what server hosts the | ||||||
|  |     Corresponding Source, you remain obligated to ensure that it is | ||||||
|  |     available for as long as needed to satisfy these requirements. | ||||||
|  | 
 | ||||||
|  |     e) Convey the object code using peer-to-peer transmission, provided | ||||||
|  |     you inform other peers where the object code and Corresponding | ||||||
|  |     Source of the work are being offered to the general public at no | ||||||
|  |     charge under subsection 6d. | ||||||
|  | 
 | ||||||
|  |   A separable portion of the object code, whose source code is excluded | ||||||
|  | from the Corresponding Source as a System Library, need not be | ||||||
|  | included in conveying the object code work. | ||||||
|  | 
 | ||||||
|  |   A "User Product" is either (1) a "consumer product", which means any | ||||||
|  | tangible personal property which is normally used for personal, family, | ||||||
|  | or household purposes, or (2) anything designed or sold for incorporation | ||||||
|  | into a dwelling.  In determining whether a product is a consumer product, | ||||||
|  | doubtful cases shall be resolved in favor of coverage.  For a particular | ||||||
|  | product received by a particular user, "normally used" refers to a | ||||||
|  | typical or common use of that class of product, regardless of the status | ||||||
|  | of the particular user or of the way in which the particular user | ||||||
|  | actually uses, or expects or is expected to use, the product.  A product | ||||||
|  | is a consumer product regardless of whether the product has substantial | ||||||
|  | commercial, industrial or non-consumer uses, unless such uses represent | ||||||
|  | the only significant mode of use of the product. | ||||||
|  | 
 | ||||||
|  |   "Installation Information" for a User Product means any methods, | ||||||
|  | procedures, authorization keys, or other information required to install | ||||||
|  | and execute modified versions of a covered work in that User Product from | ||||||
|  | a modified version of its Corresponding Source.  The information must | ||||||
|  | suffice to ensure that the continued functioning of the modified object | ||||||
|  | code is in no case prevented or interfered with solely because | ||||||
|  | modification has been made. | ||||||
|  | 
 | ||||||
|  |   If you convey an object code work under this section in, or with, or | ||||||
|  | specifically for use in, a User Product, and the conveying occurs as | ||||||
|  | part of a transaction in which the right of possession and use of the | ||||||
|  | User Product is transferred to the recipient in perpetuity or for a | ||||||
|  | fixed term (regardless of how the transaction is characterized), the | ||||||
|  | Corresponding Source conveyed under this section must be accompanied | ||||||
|  | by the Installation Information.  But this requirement does not apply | ||||||
|  | if neither you nor any third party retains the ability to install | ||||||
|  | modified object code on the User Product (for example, the work has | ||||||
|  | been installed in ROM). | ||||||
|  | 
 | ||||||
|  |   The requirement to provide Installation Information does not include a | ||||||
|  | requirement to continue to provide support service, warranty, or updates | ||||||
|  | for a work that has been modified or installed by the recipient, or for | ||||||
|  | the User Product in which it has been modified or installed.  Access to a | ||||||
|  | network may be denied when the modification itself materially and | ||||||
|  | adversely affects the operation of the network or violates the rules and | ||||||
|  | protocols for communication across the network. | ||||||
|  | 
 | ||||||
|  |   Corresponding Source conveyed, and Installation Information provided, | ||||||
|  | in accord with this section must be in a format that is publicly | ||||||
|  | documented (and with an implementation available to the public in | ||||||
|  | source code form), and must require no special password or key for | ||||||
|  | unpacking, reading or copying. | ||||||
|  | 
 | ||||||
|  |   7. Additional Terms. | ||||||
|  | 
 | ||||||
|  |   "Additional permissions" are terms that supplement the terms of this | ||||||
|  | License by making exceptions from one or more of its conditions. | ||||||
|  | Additional permissions that are applicable to the entire Program shall | ||||||
|  | be treated as though they were included in this License, to the extent | ||||||
|  | that they are valid under applicable law.  If additional permissions | ||||||
|  | apply only to part of the Program, that part may be used separately | ||||||
|  | under those permissions, but the entire Program remains governed by | ||||||
|  | this License without regard to the additional permissions. | ||||||
|  | 
 | ||||||
|  |   When you convey a copy of a covered work, you may at your option | ||||||
|  | remove any additional permissions from that copy, or from any part of | ||||||
|  | it.  (Additional permissions may be written to require their own | ||||||
|  | removal in certain cases when you modify the work.)  You may place | ||||||
|  | additional permissions on material, added by you to a covered work, | ||||||
|  | for which you have or can give appropriate copyright permission. | ||||||
|  | 
 | ||||||
|  |   Notwithstanding any other provision of this License, for material you | ||||||
|  | add to a covered work, you may (if authorized by the copyright holders of | ||||||
|  | that material) supplement the terms of this License with terms: | ||||||
|  | 
 | ||||||
|  |     a) Disclaiming warranty or limiting liability differently from the | ||||||
|  |     terms of sections 15 and 16 of this License; or | ||||||
|  | 
 | ||||||
|  |     b) Requiring preservation of specified reasonable legal notices or | ||||||
|  |     author attributions in that material or in the Appropriate Legal | ||||||
|  |     Notices displayed by works containing it; or | ||||||
|  | 
 | ||||||
|  |     c) Prohibiting misrepresentation of the origin of that material, or | ||||||
|  |     requiring that modified versions of such material be marked in | ||||||
|  |     reasonable ways as different from the original version; or | ||||||
|  | 
 | ||||||
|  |     d) Limiting the use for publicity purposes of names of licensors or | ||||||
|  |     authors of the material; or | ||||||
|  | 
 | ||||||
|  |     e) Declining to grant rights under trademark law for use of some | ||||||
|  |     trade names, trademarks, or service marks; or | ||||||
|  | 
 | ||||||
|  |     f) Requiring indemnification of licensors and authors of that | ||||||
|  |     material by anyone who conveys the material (or modified versions of | ||||||
|  |     it) with contractual assumptions of liability to the recipient, for | ||||||
|  |     any liability that these contractual assumptions directly impose on | ||||||
|  |     those licensors and authors. | ||||||
|  | 
 | ||||||
|  |   All other non-permissive additional terms are considered "further | ||||||
|  | restrictions" within the meaning of section 10.  If the Program as you | ||||||
|  | received it, or any part of it, contains a notice stating that it is | ||||||
|  | governed by this License along with a term that is a further | ||||||
|  | restriction, you may remove that term.  If a license document contains | ||||||
|  | a further restriction but permits relicensing or conveying under this | ||||||
|  | License, you may add to a covered work material governed by the terms | ||||||
|  | of that license document, provided that the further restriction does | ||||||
|  | not survive such relicensing or conveying. | ||||||
|  | 
 | ||||||
|  |   If you add terms to a covered work in accord with this section, you | ||||||
|  | must place, in the relevant source files, a statement of the | ||||||
|  | additional terms that apply to those files, or a notice indicating | ||||||
|  | where to find the applicable terms. | ||||||
|  | 
 | ||||||
|  |   Additional terms, permissive or non-permissive, may be stated in the | ||||||
|  | form of a separately written license, or stated as exceptions; | ||||||
|  | the above requirements apply either way. | ||||||
|  | 
 | ||||||
|  |   8. Termination. | ||||||
|  | 
 | ||||||
|  |   You may not propagate or modify a covered work except as expressly | ||||||
|  | provided under this License.  Any attempt otherwise to propagate or | ||||||
|  | modify it is void, and will automatically terminate your rights under | ||||||
|  | this License (including any patent licenses granted under the third | ||||||
|  | paragraph of section 11). | ||||||
|  | 
 | ||||||
|  |   However, if you cease all violation of this License, then your | ||||||
|  | license from a particular copyright holder is reinstated (a) | ||||||
|  | provisionally, unless and until the copyright holder explicitly and | ||||||
|  | finally terminates your license, and (b) permanently, if the copyright | ||||||
|  | holder fails to notify you of the violation by some reasonable means | ||||||
|  | prior to 60 days after the cessation. | ||||||
|  | 
 | ||||||
|  |   Moreover, your license from a particular copyright holder is | ||||||
|  | reinstated permanently if the copyright holder notifies you of the | ||||||
|  | violation by some reasonable means, this is the first time you have | ||||||
|  | received notice of violation of this License (for any work) from that | ||||||
|  | copyright holder, and you cure the violation prior to 30 days after | ||||||
|  | your receipt of the notice. | ||||||
|  | 
 | ||||||
|  |   Termination of your rights under this section does not terminate the | ||||||
|  | licenses of parties who have received copies or rights from you under | ||||||
|  | this License.  If your rights have been terminated and not permanently | ||||||
|  | reinstated, you do not qualify to receive new licenses for the same | ||||||
|  | material under section 10. | ||||||
|  | 
 | ||||||
|  |   9. Acceptance Not Required for Having Copies. | ||||||
|  | 
 | ||||||
|  |   You are not required to accept this License in order to receive or | ||||||
|  | run a copy of the Program.  Ancillary propagation of a covered work | ||||||
|  | occurring solely as a consequence of using peer-to-peer transmission | ||||||
|  | to receive a copy likewise does not require acceptance.  However, | ||||||
|  | nothing other than this License grants you permission to propagate or | ||||||
|  | modify any covered work.  These actions infringe copyright if you do | ||||||
|  | not accept this License.  Therefore, by modifying or propagating a | ||||||
|  | covered work, you indicate your acceptance of this License to do so. | ||||||
|  | 
 | ||||||
|  |   10. Automatic Licensing of Downstream Recipients. | ||||||
|  | 
 | ||||||
|  |   Each time you convey a covered work, the recipient automatically | ||||||
|  | receives a license from the original licensors, to run, modify and | ||||||
|  | propagate that work, subject to this License.  You are not responsible | ||||||
|  | for enforcing compliance by third parties with this License. | ||||||
|  | 
 | ||||||
|  |   An "entity transaction" is a transaction transferring control of an | ||||||
|  | organization, or substantially all assets of one, or subdividing an | ||||||
|  | organization, or merging organizations.  If propagation of a covered | ||||||
|  | work results from an entity transaction, each party to that | ||||||
|  | transaction who receives a copy of the work also receives whatever | ||||||
|  | licenses to the work the party's predecessor in interest had or could | ||||||
|  | give under the previous paragraph, plus a right to possession of the | ||||||
|  | Corresponding Source of the work from the predecessor in interest, if | ||||||
|  | the predecessor has it or can get it with reasonable efforts. | ||||||
|  | 
 | ||||||
|  |   You may not impose any further restrictions on the exercise of the | ||||||
|  | rights granted or affirmed under this License.  For example, you may | ||||||
|  | not impose a license fee, royalty, or other charge for exercise of | ||||||
|  | rights granted under this License, and you may not initiate litigation | ||||||
|  | (including a cross-claim or counterclaim in a lawsuit) alleging that | ||||||
|  | any patent claim is infringed by making, using, selling, offering for | ||||||
|  | sale, or importing the Program or any portion of it. | ||||||
|  | 
 | ||||||
|  |   11. Patents. | ||||||
|  | 
 | ||||||
|  |   A "contributor" is a copyright holder who authorizes use under this | ||||||
|  | License of the Program or a work on which the Program is based.  The | ||||||
|  | work thus licensed is called the contributor's "contributor version". | ||||||
|  | 
 | ||||||
|  |   A contributor's "essential patent claims" are all patent claims | ||||||
|  | owned or controlled by the contributor, whether already acquired or | ||||||
|  | hereafter acquired, that would be infringed by some manner, permitted | ||||||
|  | by this License, of making, using, or selling its contributor version, | ||||||
|  | but do not include claims that would be infringed only as a | ||||||
|  | consequence of further modification of the contributor version.  For | ||||||
|  | purposes of this definition, "control" includes the right to grant | ||||||
|  | patent sublicenses in a manner consistent with the requirements of | ||||||
|  | this License. | ||||||
|  | 
 | ||||||
|  |   Each contributor grants you a non-exclusive, worldwide, royalty-free | ||||||
|  | patent license under the contributor's essential patent claims, to | ||||||
|  | make, use, sell, offer for sale, import and otherwise run, modify and | ||||||
|  | propagate the contents of its contributor version. | ||||||
|  | 
 | ||||||
|  |   In the following three paragraphs, a "patent license" is any express | ||||||
|  | agreement or commitment, however denominated, not to enforce a patent | ||||||
|  | (such as an express permission to practice a patent or covenant not to | ||||||
|  | sue for patent infringement).  To "grant" such a patent license to a | ||||||
|  | party means to make such an agreement or commitment not to enforce a | ||||||
|  | patent against the party. | ||||||
|  | 
 | ||||||
|  |   If you convey a covered work, knowingly relying on a patent license, | ||||||
|  | and the Corresponding Source of the work is not available for anyone | ||||||
|  | to copy, free of charge and under the terms of this License, through a | ||||||
|  | publicly available network server or other readily accessible means, | ||||||
|  | then you must either (1) cause the Corresponding Source to be so | ||||||
|  | available, or (2) arrange to deprive yourself of the benefit of the | ||||||
|  | patent license for this particular work, or (3) arrange, in a manner | ||||||
|  | consistent with the requirements of this License, to extend the patent | ||||||
|  | license to downstream recipients.  "Knowingly relying" means you have | ||||||
|  | actual knowledge that, but for the patent license, your conveying the | ||||||
|  | covered work in a country, or your recipient's use of the covered work | ||||||
|  | in a country, would infringe one or more identifiable patents in that | ||||||
|  | country that you have reason to believe are valid. | ||||||
|  | 
 | ||||||
|  |   If, pursuant to or in connection with a single transaction or | ||||||
|  | arrangement, you convey, or propagate by procuring conveyance of, a | ||||||
|  | covered work, and grant a patent license to some of the parties | ||||||
|  | receiving the covered work authorizing them to use, propagate, modify | ||||||
|  | or convey a specific copy of the covered work, then the patent license | ||||||
|  | you grant is automatically extended to all recipients of the covered | ||||||
|  | work and works based on it. | ||||||
|  | 
 | ||||||
|  |   A patent license is "discriminatory" if it does not include within | ||||||
|  | the scope of its coverage, prohibits the exercise of, or is | ||||||
|  | conditioned on the non-exercise of one or more of the rights that are | ||||||
|  | specifically granted under this License.  You may not convey a covered | ||||||
|  | work if you are a party to an arrangement with a third party that is | ||||||
|  | in the business of distributing software, under which you make payment | ||||||
|  | to the third party based on the extent of your activity of conveying | ||||||
|  | the work, and under which the third party grants, to any of the | ||||||
|  | parties who would receive the covered work from you, a discriminatory | ||||||
|  | patent license (a) in connection with copies of the covered work | ||||||
|  | conveyed by you (or copies made from those copies), or (b) primarily | ||||||
|  | for and in connection with specific products or compilations that | ||||||
|  | contain the covered work, unless you entered into that arrangement, | ||||||
|  | or that patent license was granted, prior to 28 March 2007. | ||||||
|  | 
 | ||||||
|  |   Nothing in this License shall be construed as excluding or limiting | ||||||
|  | any implied license or other defenses to infringement that may | ||||||
|  | otherwise be available to you under applicable patent law. | ||||||
|  | 
 | ||||||
|  |   12. No Surrender of Others' Freedom. | ||||||
|  | 
 | ||||||
|  |   If conditions are imposed on you (whether by court order, agreement or | ||||||
|  | otherwise) that contradict the conditions of this License, they do not | ||||||
|  | excuse you from the conditions of this License.  If you cannot convey a | ||||||
|  | covered work so as to satisfy simultaneously your obligations under this | ||||||
|  | License and any other pertinent obligations, then as a consequence you may | ||||||
|  | not convey it at all.  For example, if you agree to terms that obligate you | ||||||
|  | to collect a royalty for further conveying from those to whom you convey | ||||||
|  | the Program, the only way you could satisfy both those terms and this | ||||||
|  | License would be to refrain entirely from conveying the Program. | ||||||
|  | 
 | ||||||
|  |   13. Use with the GNU Affero General Public License. | ||||||
|  | 
 | ||||||
|  |   Notwithstanding any other provision of this License, you have | ||||||
|  | permission to link or combine any covered work with a work licensed | ||||||
|  | under version 3 of the GNU Affero General Public License into a single | ||||||
|  | combined work, and to convey the resulting work.  The terms of this | ||||||
|  | License will continue to apply to the part which is the covered work, | ||||||
|  | but the special requirements of the GNU Affero General Public License, | ||||||
|  | section 13, concerning interaction through a network will apply to the | ||||||
|  | combination as such. | ||||||
|  | 
 | ||||||
|  |   14. Revised Versions of this License. | ||||||
|  | 
 | ||||||
|  |   The Free Software Foundation may publish revised and/or new versions of | ||||||
|  | the GNU General Public License from time to time.  Such new versions will | ||||||
|  | be similar in spirit to the present version, but may differ in detail to | ||||||
|  | address new problems or concerns. | ||||||
|  | 
 | ||||||
|  |   Each version is given a distinguishing version number.  If the | ||||||
|  | Program specifies that a certain numbered version of the GNU General | ||||||
|  | Public License "or any later version" applies to it, you have the | ||||||
|  | option of following the terms and conditions either of that numbered | ||||||
|  | version or of any later version published by the Free Software | ||||||
|  | Foundation.  If the Program does not specify a version number of the | ||||||
|  | GNU General Public License, you may choose any version ever published | ||||||
|  | by the Free Software Foundation. | ||||||
|  | 
 | ||||||
|  |   If the Program specifies that a proxy can decide which future | ||||||
|  | versions of the GNU General Public License can be used, that proxy's | ||||||
|  | public statement of acceptance of a version permanently authorizes you | ||||||
|  | to choose that version for the Program. | ||||||
|  | 
 | ||||||
|  |   Later license versions may give you additional or different | ||||||
|  | permissions.  However, no additional obligations are imposed on any | ||||||
|  | author or copyright holder as a result of your choosing to follow a | ||||||
|  | later version. | ||||||
|  | 
 | ||||||
|  |   15. Disclaimer of Warranty. | ||||||
|  | 
 | ||||||
|  |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY | ||||||
|  | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT | ||||||
|  | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY | ||||||
|  | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, | ||||||
|  | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||||||
|  | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM | ||||||
|  | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF | ||||||
|  | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. | ||||||
|  | 
 | ||||||
|  |   16. Limitation of Liability. | ||||||
|  | 
 | ||||||
|  |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||||||
|  | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS | ||||||
|  | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY | ||||||
|  | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE | ||||||
|  | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF | ||||||
|  | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD | ||||||
|  | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), | ||||||
|  | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF | ||||||
|  | SUCH DAMAGES. | ||||||
|  | 
 | ||||||
|  |   17. Interpretation of Sections 15 and 16. | ||||||
|  | 
 | ||||||
|  |   If the disclaimer of warranty and limitation of liability provided | ||||||
|  | above cannot be given local legal effect according to their terms, | ||||||
|  | reviewing courts shall apply local law that most closely approximates | ||||||
|  | an absolute waiver of all civil liability in connection with the | ||||||
|  | Program, unless a warranty or assumption of liability accompanies a | ||||||
|  | copy of the Program in return for a fee. | ||||||
|  | 
 | ||||||
|  |                      END OF TERMS AND CONDITIONS | ||||||
|  | 
 | ||||||
|  |             How to Apply These Terms to Your New Programs | ||||||
|  | 
 | ||||||
|  |   If you develop a new program, and you want it to be of the greatest | ||||||
|  | possible use to the public, the best way to achieve this is to make it | ||||||
|  | free software which everyone can redistribute and change under these terms. | ||||||
|  | 
 | ||||||
|  |   To do so, attach the following notices to the program.  It is safest | ||||||
|  | to attach them to the start of each source file to most effectively | ||||||
|  | state the exclusion of warranty; and each file should have at least | ||||||
|  | the "copyright" line and a pointer to where the full notice is found. | ||||||
|  | 
 | ||||||
|  |     <one line to give the program's name and a brief idea of what it does.> | ||||||
|  |     Copyright (C) <year>  <name of author> | ||||||
|  | 
 | ||||||
|  |     This program is free software: you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation, either version 3 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  | 
 | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  | 
 | ||||||
|  |     You should have received a copy of the GNU General Public License | ||||||
|  |     along with this program.  If not, see <https://www.gnu.org/licenses/>. | ||||||
|  | 
 | ||||||
|  | Also add information on how to contact you by electronic and paper mail. | ||||||
|  | 
 | ||||||
|  |   If the program does terminal interaction, make it output a short | ||||||
|  | notice like this when it starts in an interactive mode: | ||||||
|  | 
 | ||||||
|  |     <program>  Copyright (C) <year>  <name of author> | ||||||
|  |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | ||||||
|  |     This is free software, and you are welcome to redistribute it | ||||||
|  |     under certain conditions; type `show c' for details. | ||||||
|  | 
 | ||||||
|  | The hypothetical commands `show w' and `show c' should show the appropriate | ||||||
|  | parts of the General Public License.  Of course, your program's commands | ||||||
|  | might be different; for a GUI interface, you would use an "about box". | ||||||
|  | 
 | ||||||
|  |   You should also get your employer (if you work as a programmer) or school, | ||||||
|  | if any, to sign a "copyright disclaimer" for the program, if necessary. | ||||||
|  | For more information on this, and how to apply and follow the GNU GPL, see | ||||||
|  | <https://www.gnu.org/licenses/>. | ||||||
|  | 
 | ||||||
|  |   The GNU General Public License does not permit incorporating your program | ||||||
|  | into proprietary programs.  If your program is a subroutine library, you | ||||||
|  | may consider it more useful to permit linking proprietary applications with | ||||||
|  | the library.  If this is what you want to do, use the GNU Lesser General | ||||||
|  | Public License instead of this License.  But first, please read | ||||||
|  | <https://www.gnu.org/licenses/why-not-lgpl.html>. | ||||||
							
								
								
									
										142
									
								
								LLavaTagger/LLavaTagger.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								LLavaTagger/LLavaTagger.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,142 @@ | ||||||
|  | import warnings | ||||||
|  | warnings.simplefilter(action='ignore') | ||||||
|  | from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig, logging | ||||||
|  | import argparse | ||||||
|  | import cv2 | ||||||
|  | import torch | ||||||
|  | import os | ||||||
|  | import numpy | ||||||
|  | from typing import Iterator | ||||||
|  | from torch.multiprocessing import Process, Queue | ||||||
|  | import json | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def find_image_files(path: str) -> list[str]: | ||||||
|  | 	paths = list() | ||||||
|  | 	for root, dirs, files in os.walk(path): | ||||||
|  | 		for filename in files: | ||||||
|  | 			name, extension = os.path.splitext(filename) | ||||||
|  | 			if extension.lower() in image_ext_ocv: | ||||||
|  | 				paths.append(os.path.join(root, filename)) | ||||||
|  | 	return paths | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]: | ||||||
|  | 	for path in paths: | ||||||
|  | 		name, extension = os.path.splitext(path) | ||||||
|  | 		extension = extension.lower() | ||||||
|  | 		imagebgr = cv2.imread(path) | ||||||
|  | 		image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB) | ||||||
|  | 		if image is None: | ||||||
|  | 			print(f"Warning: could not load {path}") | ||||||
|  | 		else: | ||||||
|  | 			yield image, path | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int): | ||||||
|  | 	model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None, | ||||||
|  | 		quantization_config=BitsAndBytesConfig( | ||||||
|  | 			load_in_4bit=True, | ||||||
|  | 			bnb_4bit_compute_dtype=torch.float16, | ||||||
|  | 			bnb_4bit_use_double_quant=False, | ||||||
|  | 			bnb_4bit_quant_type='nf4', | ||||||
|  | 			), device_map=device, attn_implementation="flash_attention_2") | ||||||
|  | 	processor = AutoProcessor.from_pretrained(model_name_or_path) | ||||||
|  | 	image_generator = image_loader(image_paths) | ||||||
|  | 
 | ||||||
|  | 	stop = False | ||||||
|  | 	finished_count = 0 | ||||||
|  | 	while not stop: | ||||||
|  | 		prompts = list() | ||||||
|  | 		images = list() | ||||||
|  | 		filenames = list() | ||||||
|  | 		for i in range(0, batch_size): | ||||||
|  | 			image, filename = next(image_generator, (None, None)) | ||||||
|  | 			if image is None: | ||||||
|  | 				stop = True | ||||||
|  | 				break | ||||||
|  | 
 | ||||||
|  | 			filenames.append(filename) | ||||||
|  | 			images.append(image) | ||||||
|  | 			prompts.append(prompt) | ||||||
|  | 
 | ||||||
|  | 		if len(images) == 0: | ||||||
|  | 			break | ||||||
|  | 
 | ||||||
|  | 		inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device) | ||||||
|  | 		generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0) | ||||||
|  | 		decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) | ||||||
|  | 		finished_count += len(images) | ||||||
|  | 		for i, decoded in enumerate(decodes): | ||||||
|  | 			trim = len(prompt) - len("<image>") | ||||||
|  | 			queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()}) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def split_list(input_list, count): | ||||||
|  | 	target_length = int(len(input_list) / count) | ||||||
|  | 	for i in range(0, count - 1): | ||||||
|  | 		yield input_list[i * target_length: (i + 1) * target_length] | ||||||
|  | 	yield input_list[(count - 1) * target_length: len(input_list)] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def save_meta(meta_file, meta, reldir, common_description): | ||||||
|  | 	meta["file_name"] = os.path.relpath(meta["file_name"], reldir) | ||||||
|  | 	if common_description is not None: | ||||||
|  | 		meta["text"] = common_description + meta["text"] | ||||||
|  | 	meta_file.write(json.dumps(meta) + '\n') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  | 	parser = argparse.ArgumentParser("A script to tag images via llava") | ||||||
|  | 	parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use") | ||||||
|  | 	parser.add_argument('--quantize', '-q', action='store_true', help="load quantized") | ||||||
|  | 	parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image") | ||||||
|  | 	parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference") | ||||||
|  | 	parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one") | ||||||
|  | 	parser.add_argument('--image_dir', '-i', required=True, help="A directory containg the images to tag") | ||||||
|  | 	args = parser.parse_args() | ||||||
|  | 
 | ||||||
|  | 	prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: " | ||||||
|  | 	os.environ["BITSANDBYTES_NOWELCOME"] = "1" | ||||||
|  | 
 | ||||||
|  | 	image_paths = find_image_files(args.image_dir) | ||||||
|  | 	image_path_chunks = list(split_list(image_paths, torch.cuda.device_count())) | ||||||
|  | 
 | ||||||
|  | 	print(f"Will use {torch.cuda.device_count()} processies to create tags") | ||||||
|  | 
 | ||||||
|  | 	logging.set_verbosity_error() | ||||||
|  | 	warnings.filterwarnings("ignore") | ||||||
|  | 	torch.multiprocessing.set_start_method('spawn') | ||||||
|  | 
 | ||||||
|  | 	queue = Queue() | ||||||
|  | 	processies = list() | ||||||
|  | 	for i in range(0, torch.cuda.device_count()): | ||||||
|  | 		processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch))) | ||||||
|  | 		processies[-1].start() | ||||||
|  | 
 | ||||||
|  | 	progress = tqdm(desc="Generateing tags", total=len(image_paths)) | ||||||
|  | 	exit = False | ||||||
|  | 	with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file: | ||||||
|  | 		while not exit: | ||||||
|  | 			if not queue.empty(): | ||||||
|  | 				meta = queue.get() | ||||||
|  | 				save_meta(output_file, meta, args.image_dir, args.common_description) | ||||||
|  | 				progress.update() | ||||||
|  | 			exit = True | ||||||
|  | 			for process in processies: | ||||||
|  | 				if process.is_alive(): | ||||||
|  | 					exit = False | ||||||
|  | 					break | ||||||
|  | 
 | ||||||
|  | 		while not queue.empty(): | ||||||
|  | 			meta = queue.get() | ||||||
|  | 			save_meta(output_file, meta, args.image_dir, args.common_description) | ||||||
|  | 			progress.update() | ||||||
|  | 
 | ||||||
|  | 	for process in processies: | ||||||
|  | 		process.join() | ||||||
|  | 
 | ||||||
							
								
								
									
										21
									
								
								LLavaTagger/README.md
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								LLavaTagger/README.md
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | # LLavaTagger | ||||||
|  | 
 | ||||||
|  | LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task. | ||||||
|  | 
 | ||||||
|  | ## How to use | ||||||
|  | 
 | ||||||
|  | first create a python venv and install the required packages into it: | ||||||
|  | 
 | ||||||
|  | 	$ python -m venv venv | ||||||
|  | 	$ source venv/bin/activate | ||||||
|  | 	$ pip install -r requirements.txt | ||||||
|  | 
 | ||||||
|  | Then run LLavaTagger for instance like so: | ||||||
|  | 
 | ||||||
|  | 	$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images | ||||||
|  | 
 | ||||||
|  | By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus | ||||||
|  | 
 | ||||||
|  | LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss) | ||||||
|  | 
 | ||||||
|  | If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose | ||||||
							
								
								
									
										11
									
								
								LLavaTagger/requirements.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								LLavaTagger/requirements.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | ||||||
|  | accelerate==0.29.0 | ||||||
|  | bitsandbytes | ||||||
|  | huggingface-hub==0.22.2 | ||||||
|  | ninja==1.11.1.1 | ||||||
|  | safetensors==0.4.2 | ||||||
|  | tokenizers==0.15.2 | ||||||
|  | transformers | ||||||
|  | torch | ||||||
|  | opencv-python | ||||||
|  | numpy | ||||||
|  | tqdm | ||||||
							
								
								
									
										174
									
								
								PersonDatasetAssembler/PersonDatasetAssembler.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										174
									
								
								PersonDatasetAssembler/PersonDatasetAssembler.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,174 @@ | ||||||
|  | #!/bin/python3 | ||||||
|  | 
 | ||||||
|  | # PersonDatasetAssembler - A tool to assmble images of a specific person from a | ||||||
|  | # directory of images or from a video file | ||||||
|  | # Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  | # | ||||||
|  | # This file is part of PersonDatasetAssembler. | ||||||
|  | # | ||||||
|  | # PersonDatasetAssembler is free software: you can redistribute it and/or modify | ||||||
|  | # it under the terms of the GNU General Public License as published by | ||||||
|  | # the Free Software Foundation, either version 3 of the License, or | ||||||
|  | # (at your option) any later version. | ||||||
|  | # | ||||||
|  | # PersonDatasetAssembler is distributed in the hope that it will be useful, | ||||||
|  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | # GNU General Public License for more details. | ||||||
|  | # | ||||||
|  | # You should have received a copy of the GNU General Public License | ||||||
|  | # along with PersonDatasetAssembler.  If not, see <http://www.gnu.org/licenses/>. | ||||||
|  | 
 | ||||||
|  | import argparse | ||||||
|  | import os | ||||||
|  | from typing import Iterator | ||||||
|  | import cv2 | ||||||
|  | import numpy | ||||||
|  | from tqdm import tqdm | ||||||
|  | from wand.exceptions import BlobError | ||||||
|  | from wand.image import Image | ||||||
|  | 
 | ||||||
|  | image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"] | ||||||
|  | image_ext_wand = [".dng", ".arw"] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class LoadException(Exception): | ||||||
|  | 	pass | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def find_image_files(path: str) -> list[str]: | ||||||
|  | 	paths = list() | ||||||
|  | 	for root, dirs, files in os.walk(path): | ||||||
|  | 		for filename in files: | ||||||
|  | 			name, extension = os.path.splitext(filename) | ||||||
|  | 			if extension.lower() in image_ext_ocv or extension in image_ext_wand: | ||||||
|  | 				paths.append(os.path.join(root, filename)) | ||||||
|  | 	return paths | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]: | ||||||
|  | 	for path in paths: | ||||||
|  | 		name, extension = os.path.splitext(path) | ||||||
|  | 		extension = extension.lower() | ||||||
|  | 		if extension in image_ext_ocv: | ||||||
|  | 			image = cv2.imread(path) | ||||||
|  | 			if image is None: | ||||||
|  | 				print(f"Warning: could not load {path}") | ||||||
|  | 			else: | ||||||
|  | 				yield image | ||||||
|  | 		elif extension in image_ext_wand: | ||||||
|  | 			try: | ||||||
|  | 				image = Image(filename=path) | ||||||
|  | 			except BlobError as e: | ||||||
|  | 				print(f"Warning: could not load {path}, {e}") | ||||||
|  | 				continue | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def extract_video_images(video: cv2.VideoCapture, interval: int = 0): | ||||||
|  | 	ret = True | ||||||
|  | 	frame_counter = 0 | ||||||
|  | 	while ret: | ||||||
|  | 		video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter) | ||||||
|  | 		ret, frame = video.read() | ||||||
|  | 		if ret: | ||||||
|  | 			yield frame | ||||||
|  | 		frame_counter += interval | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool: | ||||||
|  | 	detector.setInputSize([image.shape[1], image.shape[0]]) | ||||||
|  | 	faces = detector.detect(image)[1] | ||||||
|  | 	if faces is None: | ||||||
|  | 		return 0, False | ||||||
|  | 	for face in faces: | ||||||
|  | 		cropped_image = recognizer.alignCrop(image, face) | ||||||
|  | 		features = recognizer.feature(cropped_image) | ||||||
|  | 		score_accum = 0.0 | ||||||
|  | 		for referance in referance_features: | ||||||
|  | 			score_accum += recognizer.match(referance, features, 0) | ||||||
|  | 		score = score_accum / len(referance_features) | ||||||
|  | 		if score > thresh: | ||||||
|  | 			return score, True | ||||||
|  | 	return 0, False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list(): | ||||||
|  | 	images = list() | ||||||
|  | 	out = list() | ||||||
|  | 
 | ||||||
|  | 	if os.path.isfile(referance_path): | ||||||
|  | 		image = cv2.imread(referance_path) | ||||||
|  | 		if image is None: | ||||||
|  | 			print(f"Could not load image from {referance_path}") | ||||||
|  | 		else: | ||||||
|  | 			images.append(image) | ||||||
|  | 	elif os.path.isdir(referance_path): | ||||||
|  | 		filenames = find_image_files(referance_path) | ||||||
|  | 		images = list(image_loader(filenames)) | ||||||
|  | 
 | ||||||
|  | 	for image in images: | ||||||
|  | 		detector.setInputSize([image.shape[1], image.shape[0]]) | ||||||
|  | 		faces = detector.detect(image)[1] | ||||||
|  | 		if faces is None: | ||||||
|  | 			print("unable to find face in referance image") | ||||||
|  | 			exit(1) | ||||||
|  | 		image = recognizer.alignCrop(image, faces[0]) | ||||||
|  | 		features = recognizer.feature(image) | ||||||
|  | 		out.append(features) | ||||||
|  | 
 | ||||||
|  | 	return out | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  | 	parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person") | ||||||
|  | 	parser.add_argument('--out', '-o', default="out", help="place to put dataset") | ||||||
|  | 	parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from") | ||||||
|  | 	parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file") | ||||||
|  | 	parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found") | ||||||
|  | 	parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used") | ||||||
|  | 	parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used") | ||||||
|  | 	parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use") | ||||||
|  | 	parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match") | ||||||
|  | 	args = parser.parse_args() | ||||||
|  | 
 | ||||||
|  | 	recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU) | ||||||
|  | 	detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320], | ||||||
|  | 		score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU) | ||||||
|  | 
 | ||||||
|  | 	referance_features = process_referance(detector, recognizer, args.referance) | ||||||
|  | 	if len(referance_features) < 1: | ||||||
|  | 		print(f"Could not load any referance image(s) from {args.referance}") | ||||||
|  | 		exit(1) | ||||||
|  | 
 | ||||||
|  | 	if os.path.isfile(args.input): | ||||||
|  | 		video = cv2.VideoCapture(args.input) | ||||||
|  | 		if not video.isOpened(): | ||||||
|  | 			print(f"Unable to open {args.input} as a video file") | ||||||
|  | 			exit(1) | ||||||
|  | 		image_generator = extract_video_images(video, args.skip + 1) | ||||||
|  | 		total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1) | ||||||
|  | 	elif os.path.isdir(args.input): | ||||||
|  | 		image_filenams = find_image_files(args.input) | ||||||
|  | 		image_generator = image_loader(image_filenams) | ||||||
|  | 		total_images = len(image_filenams) | ||||||
|  | 	else: | ||||||
|  | 		print(f"{args.input} is not a video file nor is it a directory") | ||||||
|  | 		exit(1) | ||||||
|  | 
 | ||||||
|  | 	os.makedirs(args.out, exist_ok=True) | ||||||
|  | 
 | ||||||
|  | 	progress = tqdm(total=int(total_images), desc="0.00") | ||||||
|  | 	counter = 0 | ||||||
|  | 	for image in image_generator: | ||||||
|  | 		if image.shape[0] > 512: | ||||||
|  | 			aspect = image.shape[0] / image.shape[1] | ||||||
|  | 			resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA) | ||||||
|  | 		else: | ||||||
|  | 			resized = image | ||||||
|  | 		score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold) | ||||||
|  | 		if match and not args.invert or not match and args.invert: | ||||||
|  | 			filename = f"{counter:04}.png" | ||||||
|  | 			cv2.imwrite(os.path.join(args.out, filename), image) | ||||||
|  | 			counter += 1 | ||||||
|  | 		progress.set_description(f"{score:1.2f}") | ||||||
|  | 		progress.update() | ||||||
|  | 
 | ||||||
							
								
								
									
										20
									
								
								PersonDatasetAssembler/README.md
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								PersonDatasetAssembler/README.md
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,20 @@ | ||||||
|  | ### PersonDatasetAssembler | ||||||
|  | 
 | ||||||
|  | PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images. | ||||||
|  | 
 | ||||||
|  | ## How to use | ||||||
|  | 
 | ||||||
|  | first create a python venv and install the required packages into it: | ||||||
|  | 
 | ||||||
|  | 	$ python -m venv venv | ||||||
|  | 	$ source venv/bin/activate | ||||||
|  | 	$ pip install -r requirements.txt | ||||||
|  | 
 | ||||||
|  | Then run PersonDatasetAssembler for instance like so: | ||||||
|  | 
 | ||||||
|  | 	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson | ||||||
|  | 
 | ||||||
|  | Or to extract images from a video: | ||||||
|  | 
 | ||||||
|  | 	$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson | ||||||
|  | 
 | ||||||
							
								
								
									
										4
									
								
								PersonDatasetAssembler/requirements.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								PersonDatasetAssembler/requirements.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,4 @@ | ||||||
|  | numpy==1.26.4 | ||||||
|  | opencv-python==4.10.0.82 | ||||||
|  | tqdm==4.66.4 | ||||||
|  | Wand==0.6.13 | ||||||
							
								
								
									
										35
									
								
								README.md
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								README.md
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,35 @@ | ||||||
|  | # SDImagePreprocess | ||||||
|  | 
 | ||||||
|  | This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion. | ||||||
|  | 
 | ||||||
|  | ## Included tools | ||||||
|  | 
 | ||||||
|  | This repo contains the following tools: | ||||||
|  | 
 | ||||||
|  | ### SmartCrop | ||||||
|  | 
 | ||||||
|  | SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided. | ||||||
|  | 
 | ||||||
|  | #### Content detected in image: | ||||||
|  | 
 | ||||||
|  |  | ||||||
|  | 
 | ||||||
|  | #### Cropped image based on content: | ||||||
|  |  | ||||||
|  | 
 | ||||||
|  | ### PersonDatasetAssembler | ||||||
|  | 
 | ||||||
|  | PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images. | ||||||
|  | 
 | ||||||
|  | ### LLavaTagger | ||||||
|  | 
 | ||||||
|  | LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task. | ||||||
|  | 
 | ||||||
|  | ### DanbooruTagger | ||||||
|  | 
 | ||||||
|  | DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ## License | ||||||
|  | 
 | ||||||
|  | All files in this repo are litcenced GPL V3, see LICENSE | ||||||
							
								
								
									
										16
									
								
								SmartCrop/CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								SmartCrop/CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | ||||||
|  | cmake_minimum_required(VERSION 3.6) | ||||||
|  | 
 | ||||||
|  | find_package(OpenCV REQUIRED) | ||||||
|  | 
 | ||||||
|  | set(CMAKE_CXX_STANDARD 17) | ||||||
|  | 
 | ||||||
|  | set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp) | ||||||
|  | 
 | ||||||
|  | add_executable(smartcrop ${SRC_FILES}) | ||||||
|  | target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb) | ||||||
|  | target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS}) | ||||||
|  | target_compile_options(smartcrop PRIVATE -s -g -Wall) | ||||||
|  | message(WARNING ${WEIGHT_DIR}) | ||||||
|  | target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}") | ||||||
|  | 
 | ||||||
|  | install(TARGETS smartcrop RUNTIME DESTINATION bin) | ||||||
							
								
								
									
										50
									
								
								SmartCrop/README.md
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								SmartCrop/README.md
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,50 @@ | ||||||
|  | # SmartCrop | ||||||
|  | 
 | ||||||
|  | SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided. | ||||||
|  | 
 | ||||||
|  | ## Requirements | ||||||
|  | 
 | ||||||
|  | * [cmake](https://cmake.org/) 3.6 or later | ||||||
|  | * [opencv](https://opencv.org/) 4.8 or later | ||||||
|  | * A c++17 capable compiler and standard lib like gcc or llvm/clang | ||||||
|  | * git is required to get the source | ||||||
|  | 
 | ||||||
|  | ## Building | ||||||
|  | 
 | ||||||
|  | The steps to build this application are: | ||||||
|  | 
 | ||||||
|  | 	$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git | ||||||
|  | 	$ cd SDImagePreprocess | ||||||
|  | 	$ mkdir build | ||||||
|  | 	$ cmake .. | ||||||
|  | 	$ make | ||||||
|  | 
 | ||||||
|  | The binary can then be found in build/SmartCrop and can optionaly be installed with: | ||||||
|  | 
 | ||||||
|  | 	$ sudo make install | ||||||
|  | 
 | ||||||
|  | ## Basic usage | ||||||
|  | 
 | ||||||
|  | To process all images in the directory ~/images and output the images into ~/proceesedImages: | ||||||
|  | 
 | ||||||
|  | 	$ smartcrop --out processedImages ~/images/* | ||||||
|  | 
 | ||||||
|  | To also focus on the person in the image ~/person.jpg | ||||||
|  | 
 | ||||||
|  | 	$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/* | ||||||
|  | 
 | ||||||
|  | To also enable seam carving | ||||||
|  | 
 | ||||||
|  | 	$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/* | ||||||
|  | 
 | ||||||
|  | see smartcrop --help for more | ||||||
|  | 
 | ||||||
|  | ## Example | ||||||
|  | 
 | ||||||
|  | #### Content detected in image: | ||||||
|  |  | ||||||
|  | 
 | ||||||
|  | #### Cropped image based on content: | ||||||
|  |  | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
							
								
								
									
										163
									
								
								SmartCrop/facerecognizer.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								SmartCrop/facerecognizer.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,163 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include "facerecognizer.h" | ||||||
|  | #include <filesystem> | ||||||
|  | 
 | ||||||
|  | #define INCBIN_PREFIX r | ||||||
|  | #include "incbin.h" | ||||||
|  | 
 | ||||||
|  | INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx"); | ||||||
|  | INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx"); | ||||||
|  | 
 | ||||||
|  | #include <opencv2/dnn/dnn.hpp> | ||||||
|  | #include <opencv2/core.hpp> | ||||||
|  | #include <opencv2/highgui.hpp> | ||||||
|  | #include <fstream> | ||||||
|  | 
 | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize); | ||||||
|  | 
 | ||||||
|  | FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances) | ||||||
|  | { | ||||||
|  | 	if(detectorPath.empty()) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::INFO)<<"Using builtin face detection model"; | ||||||
|  | 
 | ||||||
|  | 		detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU); | ||||||
|  | 		if(!detector) | ||||||
|  | 			throw LoadException("Unable to load detector network from built in file"); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU); | ||||||
|  | 		if(!detector) | ||||||
|  | 			throw LoadException("Unable to load detector network from "+detectorPath.string()); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	bool defaultNetwork = recognizerPath.empty(); | ||||||
|  | 
 | ||||||
|  | 	if(defaultNetwork) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::INFO)<<"Using builtin face recognition model"; | ||||||
|  | 		recognizerPath = cv::tempfile("onnx"); | ||||||
|  | 		std::ofstream file(recognizerPath); | ||||||
|  | 		if(!file.is_open()) | ||||||
|  | 			throw LoadException("Unable open temporary file at "+recognizerPath.string()); | ||||||
|  | 		Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network"; | ||||||
|  | 		file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize); | ||||||
|  | 		file.close(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU); | ||||||
|  | 
 | ||||||
|  | 	if(defaultNetwork) | ||||||
|  | 		std::filesystem::remove(recognizerPath); | ||||||
|  | 
 | ||||||
|  | 	if(!recognizer) | ||||||
|  | 		throw LoadException("Unable to load recognizer network from "+recognizerPath.string()); | ||||||
|  | 
 | ||||||
|  | 	addReferances(referances); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input) | ||||||
|  | { | ||||||
|  | 	detector->setInputSize(input.size()); | ||||||
|  | 	cv::Mat faces; | ||||||
|  | 	detector->detect(input, faces); | ||||||
|  | 	return faces; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances) | ||||||
|  | { | ||||||
|  | 	bool ret = false; | ||||||
|  | 	for(const cv::Mat& image : referances) | ||||||
|  | 	{ | ||||||
|  | 		cv::Mat faces = detectFaces(image); | ||||||
|  | 		assert(faces.cols == 15); | ||||||
|  | 		if(faces.empty()) | ||||||
|  | 		{ | ||||||
|  | 			Log(Log::WARN)<<"A referance image provided dose not contian any face"; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		if(faces.rows > 1) | ||||||
|  | 			Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered"; | ||||||
|  | 		cv::Mat cropedImage; | ||||||
|  | 		recognizer->alignCrop(image, faces.row(0), cropedImage); | ||||||
|  | 		cv::Mat features; | ||||||
|  | 		recognizer->feature(cropedImage, features); | ||||||
|  | 		referanceFeatures.push_back(features.clone()); | ||||||
|  | 		ret = true; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void FaceRecognizer::setThreshold(double threasholdIn) | ||||||
|  | { | ||||||
|  | 	threshold = threasholdIn; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | double FaceRecognizer::getThreshold() | ||||||
|  | { | ||||||
|  | 	return threshold; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void FaceRecognizer::clearReferances() | ||||||
|  | { | ||||||
|  | 	referanceFeatures.clear(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone) | ||||||
|  | { | ||||||
|  | 	cv::Mat faces = detectFaces(input); | ||||||
|  | 
 | ||||||
|  | 	Detection bestMatch; | ||||||
|  | 	bestMatch.confidence = 0; | ||||||
|  | 	bestMatch.person = -1; | ||||||
|  | 
 | ||||||
|  | 	if(alone && faces.rows > 1) | ||||||
|  | 	{ | ||||||
|  | 		bestMatch.person = -2; | ||||||
|  | 		return bestMatch; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for(int i = 0; i < faces.rows; ++i) | ||||||
|  | 	{ | ||||||
|  | 		cv::Mat face; | ||||||
|  | 		recognizer->alignCrop(input, faces.row(i), face); | ||||||
|  | 		cv::Mat features; | ||||||
|  | 		recognizer->feature(face, features); | ||||||
|  | 		features = features.clone(); | ||||||
|  | 		for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex) | ||||||
|  | 		{ | ||||||
|  | 			double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE); | ||||||
|  | 			if(score > threshold && score > bestMatch.confidence) | ||||||
|  | 			{ | ||||||
|  | 				bestMatch.confidence = score; | ||||||
|  | 				bestMatch.person = referanceIndex; | ||||||
|  | 				bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3)); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return bestMatch; | ||||||
|  | } | ||||||
							
								
								
									
										67
									
								
								SmartCrop/facerecognizer.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								SmartCrop/facerecognizer.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,67 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | #include <exception> | ||||||
|  | #include <opencv2/core/mat.hpp> | ||||||
|  | #include <opencv2/objdetect/face.hpp> | ||||||
|  | #include <opencv2/core.hpp> | ||||||
|  | #include <vector> | ||||||
|  | #include <memory> | ||||||
|  | #include <filesystem> | ||||||
|  | 
 | ||||||
|  | class FaceRecognizer | ||||||
|  | { | ||||||
|  | public: | ||||||
|  | 
 | ||||||
|  | 	struct Detection | ||||||
|  | 	{ | ||||||
|  | 		int person; | ||||||
|  | 		float confidence; | ||||||
|  | 		cv::Rect rect; | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | 	class LoadException : public std::exception | ||||||
|  | 	{ | ||||||
|  | 	private: | ||||||
|  | 		std::string message; | ||||||
|  | 	public: | ||||||
|  | 		LoadException(const std::string& msg): std::exception(), message(msg) {} | ||||||
|  | 		virtual const char* what() const throw() override | ||||||
|  | 		{ | ||||||
|  | 			return message.c_str(); | ||||||
|  | 		} | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  | 	std::vector<cv::Mat> referanceFeatures; | ||||||
|  | 	std::shared_ptr<cv::FaceRecognizerSF> recognizer; | ||||||
|  | 	std::shared_ptr<cv::FaceDetectorYN> detector; | ||||||
|  | 
 | ||||||
|  | 	double threshold = 0.363; | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  | 	FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>()); | ||||||
|  | 	cv::Mat detectFaces(const cv::Mat& input); | ||||||
|  | 	Detection isMatch(const cv::Mat& input, bool alone = false); | ||||||
|  | 	bool addReferances(const std::vector<cv::Mat>& referances); | ||||||
|  | 	void setThreshold(double threashold); | ||||||
|  | 	double getThreshold(); | ||||||
|  | 	void clearReferances(); | ||||||
|  | }; | ||||||
							
								
								
									
										
											BIN
										
									
								
								SmartCrop/images/IMGP3692.jpg
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								SmartCrop/images/IMGP3692.jpg
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 251 KiB | 
							
								
								
									
										
											BIN
										
									
								
								SmartCrop/images/IMGP3692C.jpg
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								SmartCrop/images/IMGP3692C.jpg
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 97 KiB | 
							
								
								
									
										495
									
								
								SmartCrop/incbin.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										495
									
								
								SmartCrop/incbin.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,495 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @file incbin.h | ||||||
|  |  * @author Dale Weiler | ||||||
|  |  * @brief Utility for including binary files | ||||||
|  |  * | ||||||
|  |  * Facilities for including binary files into the current translation unit and | ||||||
|  |  * making use from them externally in other translation units. | ||||||
|  |  */ | ||||||
|  | #ifndef INCBIN_HDR | ||||||
|  | #define INCBIN_HDR | ||||||
|  | #include <limits.h> | ||||||
|  | #if   defined(__AVX512BW__) || \ | ||||||
|  |       defined(__AVX512CD__) || \ | ||||||
|  |       defined(__AVX512DQ__) || \ | ||||||
|  |       defined(__AVX512ER__) || \ | ||||||
|  |       defined(__AVX512PF__) || \ | ||||||
|  |       defined(__AVX512VL__) || \ | ||||||
|  |       defined(__AVX512F__) | ||||||
|  | # define INCBIN_ALIGNMENT_INDEX 6 | ||||||
|  | #elif defined(__AVX__)      || \ | ||||||
|  |       defined(__AVX2__) | ||||||
|  | # define INCBIN_ALIGNMENT_INDEX 5 | ||||||
|  | #elif defined(__SSE__)      || \ | ||||||
|  |       defined(__SSE2__)     || \ | ||||||
|  |       defined(__SSE3__)     || \ | ||||||
|  |       defined(__SSSE3__)    || \ | ||||||
|  |       defined(__SSE4_1__)   || \ | ||||||
|  |       defined(__SSE4_2__)   || \ | ||||||
|  |       defined(__neon__)     || \ | ||||||
|  |       defined(__ARM_NEON)   || \ | ||||||
|  |       defined(__ALTIVEC__) | ||||||
|  | # define INCBIN_ALIGNMENT_INDEX 4 | ||||||
|  | #elif ULONG_MAX != 0xffffffffu | ||||||
|  | # define INCBIN_ALIGNMENT_INDEX 3 | ||||||
|  | # else | ||||||
|  | # define INCBIN_ALIGNMENT_INDEX 2 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ | ||||||
|  | #define INCBIN_ALIGN_SHIFT_0 1 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_1 2 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_2 4 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_3 8 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_4 16 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_5 32 | ||||||
|  | #define INCBIN_ALIGN_SHIFT_6 64 | ||||||
|  | 
 | ||||||
|  | /* Actual alignment value */ | ||||||
|  | #define INCBIN_ALIGNMENT \ | ||||||
|  |     INCBIN_CONCATENATE( \ | ||||||
|  |         INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ | ||||||
|  |         INCBIN_ALIGNMENT_INDEX) | ||||||
|  | 
 | ||||||
|  | /* Stringize */ | ||||||
|  | #define INCBIN_STR(X) \ | ||||||
|  |     #X | ||||||
|  | #define INCBIN_STRINGIZE(X) \ | ||||||
|  |     INCBIN_STR(X) | ||||||
|  | /* Concatenate */ | ||||||
|  | #define INCBIN_CAT(X, Y) \ | ||||||
|  |     X ## Y | ||||||
|  | #define INCBIN_CONCATENATE(X, Y) \ | ||||||
|  |     INCBIN_CAT(X, Y) | ||||||
|  | /* Deferred macro expansion */ | ||||||
|  | #define INCBIN_EVAL(X) \ | ||||||
|  |     X | ||||||
|  | #define INCBIN_INVOKE(N, ...) \ | ||||||
|  |     INCBIN_EVAL(N(__VA_ARGS__)) | ||||||
|  | /* Variable argument count for overloading by arity */ | ||||||
|  | #define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N | ||||||
|  | #define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0) | ||||||
|  | 
 | ||||||
|  | /* Green Hills uses a different directive for including binary data */ | ||||||
|  | #if defined(__ghs__) | ||||||
|  | #  if (__ghs_asm == 2) | ||||||
|  | #    define INCBIN_MACRO ".file" | ||||||
|  | /* Or consider the ".myrawdata" entry in the ld file */ | ||||||
|  | #  else | ||||||
|  | #    define INCBIN_MACRO "\tINCBIN" | ||||||
|  | #  endif | ||||||
|  | #else | ||||||
|  | #  define INCBIN_MACRO ".incbin" | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifndef _MSC_VER | ||||||
|  | #  define INCBIN_ALIGN \ | ||||||
|  |     __attribute__((aligned(INCBIN_ALIGNMENT))) | ||||||
|  | #else | ||||||
|  | #  define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if defined(__arm__) || /* GNU C and RealView */ \ | ||||||
|  |     defined(__arm) || /* Diab */ \ | ||||||
|  |     defined(_ARM) /* ImageCraft */ | ||||||
|  | #  define INCBIN_ARM | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifdef __GNUC__ | ||||||
|  | /* Utilize .balign where supported */ | ||||||
|  | #  define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" | ||||||
|  | #  define INCBIN_ALIGN_BYTE ".balign 1\n" | ||||||
|  | #elif defined(INCBIN_ARM) | ||||||
|  | /*
 | ||||||
|  |  * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is | ||||||
|  |  * the shift count. This is the value passed to `.align' | ||||||
|  |  */ | ||||||
|  | #  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" | ||||||
|  | #  define INCBIN_ALIGN_BYTE ".align 0\n" | ||||||
|  | #else | ||||||
|  | /* We assume other inline assembler's treat `.align' as `.balign' */ | ||||||
|  | #  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" | ||||||
|  | #  define INCBIN_ALIGN_BYTE ".align 1\n" | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /* INCBIN_CONST is used by incbin.c generated files */ | ||||||
|  | #if defined(__cplusplus) | ||||||
|  | #  define INCBIN_EXTERNAL extern "C" | ||||||
|  | #  define INCBIN_CONST    extern const | ||||||
|  | #else | ||||||
|  | #  define INCBIN_EXTERNAL extern | ||||||
|  | #  define INCBIN_CONST    const | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Optionally override the linker section into which size and data is | ||||||
|  |  * emitted. | ||||||
|  |  *  | ||||||
|  |  * @warning If you use this facility, you might have to deal with | ||||||
|  |  * platform-specific linker output section naming on your own. | ||||||
|  |  */ | ||||||
|  | #if !defined(INCBIN_OUTPUT_SECTION) | ||||||
|  | #  if defined(__APPLE__) | ||||||
|  | #    define INCBIN_OUTPUT_SECTION ".const_data" | ||||||
|  | #  else | ||||||
|  | #    define INCBIN_OUTPUT_SECTION ".rodata" | ||||||
|  | #  endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Optionally override the linker section into which data is emitted. | ||||||
|  |  * | ||||||
|  |  * @warning If you use this facility, you might have to deal with | ||||||
|  |  * platform-specific linker output section naming on your own. | ||||||
|  |  */ | ||||||
|  | #if !defined(INCBIN_OUTPUT_DATA_SECTION) | ||||||
|  | #  define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Optionally override the linker section into which size is emitted. | ||||||
|  |  * | ||||||
|  |  * @warning If you use this facility, you might have to deal with | ||||||
|  |  * platform-specific linker output section naming on your own. | ||||||
|  |  *  | ||||||
|  |  * @note This is useful for Harvard architectures where program memory cannot | ||||||
|  |  * be directly read from the program without special instructions. With this you | ||||||
|  |  * can chose to put the size variable in RAM rather than ROM. | ||||||
|  |  */ | ||||||
|  | #if !defined(INCBIN_OUTPUT_SIZE_SECTION) | ||||||
|  | #  define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if defined(__APPLE__) | ||||||
|  | #  include "TargetConditionals.h" | ||||||
|  | #  if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING) | ||||||
|  | #    warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." | ||||||
|  | #  endif | ||||||
|  | /* The directives are different for Apple branded compilers */ | ||||||
|  | #  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n" | ||||||
|  | #  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" | ||||||
|  | #  define INCBIN_INT             ".long " | ||||||
|  | #  define INCBIN_MANGLE          "_" | ||||||
|  | #  define INCBIN_BYTE            ".byte " | ||||||
|  | #  define INCBIN_TYPE(...) | ||||||
|  | #else | ||||||
|  | #  define INCBIN_SECTION         ".section " INCBIN_OUTPUT_SECTION "\n" | ||||||
|  | #  define INCBIN_GLOBAL(NAME)    ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" | ||||||
|  | #  if defined(__ghs__) | ||||||
|  | #    define INCBIN_INT           ".word " | ||||||
|  | #  else | ||||||
|  | #    define INCBIN_INT           ".int " | ||||||
|  | #  endif | ||||||
|  | #  if defined(__USER_LABEL_PREFIX__) | ||||||
|  | #    define INCBIN_MANGLE        INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) | ||||||
|  | #  else | ||||||
|  | #    define INCBIN_MANGLE        "" | ||||||
|  | #  endif | ||||||
|  | #  if defined(INCBIN_ARM) | ||||||
|  | /* On arm assemblers, `@' is used as a line comment token */ | ||||||
|  | #    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" | ||||||
|  | #  elif defined(__MINGW32__) || defined(__MINGW64__) | ||||||
|  | /* Mingw doesn't support this directive either */ | ||||||
|  | #    define INCBIN_TYPE(NAME) | ||||||
|  | #  else | ||||||
|  | /* It's safe to use `@' on other architectures */ | ||||||
|  | #    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" | ||||||
|  | #  endif | ||||||
|  | #  define INCBIN_BYTE            ".byte " | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /* List of style types used for symbol names */ | ||||||
|  | #define INCBIN_STYLE_CAMEL 0 | ||||||
|  | #define INCBIN_STYLE_SNAKE 1 | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Specify the prefix to use for symbol names. | ||||||
|  |  * | ||||||
|  |  * @note By default this is "g". | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * #define INCBIN_PREFIX incbin | ||||||
|  |  * #include "incbin.h" | ||||||
|  |  * INCBIN(Foo, "foo.txt"); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols instead:
 | ||||||
|  |  * // const unsigned char incbinFoo<data>[];
 | ||||||
|  |  * // const unsigned char *const incbinFoo<end>;
 | ||||||
|  |  * // const unsigned int incbinFoo<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  */ | ||||||
|  | #if !defined(INCBIN_PREFIX) | ||||||
|  | #  define INCBIN_PREFIX g | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Specify the style used for symbol names. | ||||||
|  |  * | ||||||
|  |  * Possible options are | ||||||
|  |  * - INCBIN_STYLE_CAMEL "CamelCase" | ||||||
|  |  * - INCBIN_STYLE_SNAKE "snake_case" | ||||||
|  |  * | ||||||
|  |  * @note By default this is INCBIN_STYLE_CAMEL | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * #define INCBIN_STYLE INCBIN_STYLE_SNAKE | ||||||
|  |  * #include "incbin.h" | ||||||
|  |  * INCBIN(foo, "foo.txt"); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // const unsigned char <prefix>foo_data[];
 | ||||||
|  |  * // const unsigned char *const <prefix>foo_end;
 | ||||||
|  |  * // const unsigned int <prefix>foo_size;
 | ||||||
|  |  * @endcode | ||||||
|  |  */ | ||||||
|  | #if !defined(INCBIN_STYLE) | ||||||
|  | #  define INCBIN_STYLE INCBIN_STYLE_CAMEL | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /* Style lookup tables */ | ||||||
|  | #define INCBIN_STYLE_0_DATA Data | ||||||
|  | #define INCBIN_STYLE_0_END End | ||||||
|  | #define INCBIN_STYLE_0_SIZE Size | ||||||
|  | #define INCBIN_STYLE_1_DATA _data | ||||||
|  | #define INCBIN_STYLE_1_END _end | ||||||
|  | #define INCBIN_STYLE_1_SIZE _size | ||||||
|  | 
 | ||||||
|  | /* Style lookup: returning identifier */ | ||||||
|  | #define INCBIN_STYLE_IDENT(TYPE) \ | ||||||
|  |     INCBIN_CONCATENATE( \ | ||||||
|  |         INCBIN_STYLE_, \ | ||||||
|  |         INCBIN_CONCATENATE( \ | ||||||
|  |             INCBIN_EVAL(INCBIN_STYLE), \ | ||||||
|  |             INCBIN_CONCATENATE(_, TYPE))) | ||||||
|  | 
 | ||||||
|  | /* Style lookup: returning string literal */ | ||||||
|  | #define INCBIN_STYLE_STRING(TYPE) \ | ||||||
|  |     INCBIN_STRINGIZE( \ | ||||||
|  |         INCBIN_STYLE_IDENT(TYPE)) \ | ||||||
|  | 
 | ||||||
|  | /* Generate the global labels by indirectly invoking the macro with our style
 | ||||||
|  |  * type and concatenating the name against them. */ | ||||||
|  | #define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ | ||||||
|  |     INCBIN_INVOKE( \ | ||||||
|  |         INCBIN_GLOBAL, \ | ||||||
|  |         INCBIN_CONCATENATE( \ | ||||||
|  |             NAME, \ | ||||||
|  |             INCBIN_INVOKE( \ | ||||||
|  |                 INCBIN_STYLE_IDENT, \ | ||||||
|  |                 TYPE))) \ | ||||||
|  |     INCBIN_INVOKE( \ | ||||||
|  |         INCBIN_TYPE, \ | ||||||
|  |         INCBIN_CONCATENATE( \ | ||||||
|  |             NAME, \ | ||||||
|  |             INCBIN_INVOKE( \ | ||||||
|  |                 INCBIN_STYLE_IDENT, \ | ||||||
|  |                 TYPE))) | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Externally reference binary data included in another translation unit. | ||||||
|  |  * | ||||||
|  |  * Produces three external symbols that reference the binary data included in | ||||||
|  |  * another translation unit. | ||||||
|  |  * | ||||||
|  |  * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with | ||||||
|  |  * "Data", as well as "End" and "Size" after. An example is provided below. | ||||||
|  |  * | ||||||
|  |  * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. | ||||||
|  |  * @param NAME The name given for the binary data | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * INCBIN_EXTERN(Foo); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // extern const unsigned char <prefix>Foo<data>[];
 | ||||||
|  |  * // extern const unsigned char *const <prefix>Foo<end>;
 | ||||||
|  |  * // extern const unsigned int <prefix>Foo<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  *  | ||||||
|  |  * You may specify a custom optional data type as well as the first argument. | ||||||
|  |  * @code | ||||||
|  |  * INCBIN_EXTERN(custom_type, Foo); | ||||||
|  |  *  | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // extern const custom_type <prefix>Foo<data>[];
 | ||||||
|  |  * // extern const custom_type *const <prefix>Foo<end>;
 | ||||||
|  |  * // extern const unsigned int <prefix>Foo<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  */ | ||||||
|  | #define INCBIN_EXTERN(...) \ | ||||||
|  |     INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) | ||||||
|  | #define INCBIN_EXTERN_1(NAME, ...) \ | ||||||
|  |     INCBIN_EXTERN_2(unsigned char, NAME) | ||||||
|  | #define INCBIN_EXTERN_2(TYPE, NAME) \ | ||||||
|  |     INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \ | ||||||
|  |         INCBIN_CONCATENATE( \ | ||||||
|  |             INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ | ||||||
|  |             INCBIN_STYLE_IDENT(DATA))[]; \ | ||||||
|  |     INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \ | ||||||
|  |     INCBIN_CONCATENATE( \ | ||||||
|  |         INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ | ||||||
|  |         INCBIN_STYLE_IDENT(END)); \ | ||||||
|  |     INCBIN_EXTERNAL const unsigned int \ | ||||||
|  |         INCBIN_CONCATENATE( \ | ||||||
|  |             INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ | ||||||
|  |             INCBIN_STYLE_IDENT(SIZE)) | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Externally reference textual data included in another translation unit. | ||||||
|  |  * | ||||||
|  |  * Produces three external symbols that reference the textual data included in | ||||||
|  |  * another translation unit. | ||||||
|  |  * | ||||||
|  |  * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with | ||||||
|  |  * "Data", as well as "End" and "Size" after. An example is provided below. | ||||||
|  |  * | ||||||
|  |  * @param NAME The name given for the textual data | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * INCBIN_EXTERN(Foo); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // extern const char <prefix>Foo<data>[];
 | ||||||
|  |  * // extern const char *const <prefix>Foo<end>;
 | ||||||
|  |  * // extern const unsigned int <prefix>Foo<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  */ | ||||||
|  | #define INCTXT_EXTERN(NAME) \ | ||||||
|  |     INCBIN_EXTERN_2(char, NAME) | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Include a binary file into the current translation unit. | ||||||
|  |  * | ||||||
|  |  * Includes a binary file into the current translation unit, producing three symbols | ||||||
|  |  * for objects that encode the data and size respectively. | ||||||
|  |  * | ||||||
|  |  * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with | ||||||
|  |  * "Data", as well as "End" and "Size" after. An example is provided below. | ||||||
|  |  * | ||||||
|  |  * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. | ||||||
|  |  * @param NAME The name to associate with this binary data (as an identifier.) | ||||||
|  |  * @param FILENAME The file to include (as a string literal.) | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * INCBIN(Icon, "icon.png"); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // const unsigned char <prefix>Icon<data>[];
 | ||||||
|  |  * // const unsigned char *const <prefix>Icon<end>;
 | ||||||
|  |  * // const unsigned int <prefix>Icon<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  *  | ||||||
|  |  * You may specify a custom optional data type as well as the first argument. | ||||||
|  |  * These macros are specialized by arity. | ||||||
|  |  * @code | ||||||
|  |  * INCBIN(custom_type, Icon, "icon.png"); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // const custom_type <prefix>Icon<data>[];
 | ||||||
|  |  * // const custom_type *const <prefix>Icon<end>;
 | ||||||
|  |  * // const unsigned int <prefix>Icon<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  * | ||||||
|  |  * @warning This must be used in global scope | ||||||
|  |  * @warning The identifiers may be different if INCBIN_STYLE is not default | ||||||
|  |  * | ||||||
|  |  * To externally reference the data included by this in another translation unit | ||||||
|  |  * please @see INCBIN_EXTERN. | ||||||
|  |  */ | ||||||
|  | #ifdef _MSC_VER | ||||||
|  | #  define INCBIN(NAME, FILENAME) \ | ||||||
|  |       INCBIN_EXTERN(NAME) | ||||||
|  | #else | ||||||
|  | #  define INCBIN(...) \ | ||||||
|  |      INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) | ||||||
|  | #  if defined(__GNUC__) | ||||||
|  | #    define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"") | ||||||
|  | #  elif defined(__clang__) | ||||||
|  | #    define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"") | ||||||
|  | #  else | ||||||
|  | #    define INCBIN_1(...) /* Cannot do anything here */ | ||||||
|  | #  endif | ||||||
|  | #  define INCBIN_2(NAME, FILENAME) \ | ||||||
|  |       INCBIN_3(unsigned char, NAME, FILENAME) | ||||||
|  | #  define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */) | ||||||
|  | #  define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \ | ||||||
|  |     __asm__(INCBIN_SECTION \ | ||||||
|  |             INCBIN_GLOBAL_LABELS(NAME, DATA) \ | ||||||
|  |             INCBIN_ALIGN_HOST \ | ||||||
|  |             INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ | ||||||
|  |             INCBIN_MACRO " \"" FILENAME "\"\n" \ | ||||||
|  |                 TERMINATOR \ | ||||||
|  |             INCBIN_GLOBAL_LABELS(NAME, END) \ | ||||||
|  |             INCBIN_ALIGN_BYTE \ | ||||||
|  |             INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ | ||||||
|  |                 INCBIN_BYTE "1\n" \ | ||||||
|  |             INCBIN_GLOBAL_LABELS(NAME, SIZE) \ | ||||||
|  |             INCBIN_ALIGN_HOST \ | ||||||
|  |             INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ | ||||||
|  |                 INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ | ||||||
|  |                            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ | ||||||
|  |             INCBIN_ALIGN_HOST \ | ||||||
|  |             ".text\n" \ | ||||||
|  |     ); \ | ||||||
|  |     INCBIN_EXTERN(TYPE, NAME) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * @brief Include a textual file into the current translation unit. | ||||||
|  |  *  | ||||||
|  |  * This behaves the same as INCBIN except it produces char compatible arrays | ||||||
|  |  * and implicitly adds a null-terminator byte, thus the size of data included | ||||||
|  |  * by this is one byte larger than that of INCBIN. | ||||||
|  |  * | ||||||
|  |  * Includes a textual file into the current translation unit, producing three | ||||||
|  |  * symbols for objects that encode the data and size respectively. | ||||||
|  |  * | ||||||
|  |  * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with | ||||||
|  |  * "Data", as well as "End" and "Size" after. An example is provided below. | ||||||
|  |  * | ||||||
|  |  * @param NAME The name to associate with this binary data (as an identifier.) | ||||||
|  |  * @param FILENAME The file to include (as a string literal.) | ||||||
|  |  * | ||||||
|  |  * @code | ||||||
|  |  * INCTXT(Readme, "readme.txt"); | ||||||
|  |  * | ||||||
|  |  * // Now you have the following symbols:
 | ||||||
|  |  * // const char <prefix>Readme<data>[];
 | ||||||
|  |  * // const char *const <prefix>Readme<end>;
 | ||||||
|  |  * // const unsigned int <prefix>Readme<size>;
 | ||||||
|  |  * @endcode | ||||||
|  |  * | ||||||
|  |  * @warning This must be used in global scope | ||||||
|  |  * @warning The identifiers may be different if INCBIN_STYLE is not default | ||||||
|  |  * | ||||||
|  |  * To externally reference the data included by this in another translation unit | ||||||
|  |  * please @see INCBIN_EXTERN. | ||||||
|  |  */ | ||||||
|  | #if defined(_MSC_VER) | ||||||
|  | #  define INCTXT(NAME, FILENAME) \ | ||||||
|  |      INCBIN_EXTERN(NAME) | ||||||
|  | #else | ||||||
|  | #  define INCTXT(NAME, FILENAME) \ | ||||||
|  |      INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n") | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
							
								
								
									
										128
									
								
								SmartCrop/intelligentroi.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								SmartCrop/intelligentroi.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,128 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include "intelligentroi.h" | ||||||
|  | 
 | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | 
 | ||||||
|  | #include "utils.h" | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center) | ||||||
|  | { | ||||||
|  | 	if(a.second != b.second) | ||||||
|  | 		return a.second > b.second; | ||||||
|  | 
 | ||||||
|  | 	double distA = pointDist(a.first, center); | ||||||
|  | 	double distB = pointDist(b.first, center); | ||||||
|  | 
 | ||||||
|  | 	return distA < distB; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point) | ||||||
|  | { | ||||||
|  | 	if(!pointInRect(point, rect)) | ||||||
|  | 	{ | ||||||
|  | 		if(point.x < rect.x) | ||||||
|  | 			rect.x = point.x; | ||||||
|  | 		else if(point.x > rect.x+rect.width) | ||||||
|  | 			rect.x = point.x-rect.width; | ||||||
|  | 		if(point.y < rect.y) | ||||||
|  | 			rect.y = point.y; | ||||||
|  | 		else if(point.y > rect.y+rect.height) | ||||||
|  | 			rect.y = point.y-rect.height; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude) | ||||||
|  | { | ||||||
|  | 	incompleate = false; | ||||||
|  | 	int diameter = std::min(imageSize.height, imageSize.width); | ||||||
|  | 	cv::Point2i point(imageSize.width/2, imageSize.height/2); | ||||||
|  | 	cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter); | ||||||
|  | 
 | ||||||
|  | 	std::sort(mustInclude.begin(), mustInclude.end(), | ||||||
|  | 		[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);}); | ||||||
|  | 
 | ||||||
|  | 	while(true) | ||||||
|  | 	{ | ||||||
|  | 		cv::Rect includeRect = rectFromPoints(mustInclude); | ||||||
|  | 		if(includeRect.width-2 > diameter || includeRect.height-2 > diameter) | ||||||
|  | 		{ | ||||||
|  | 			incompleate = true; | ||||||
|  | 			slideRectToPoint(candiate, mustInclude.back().first); | ||||||
|  | 			mustInclude.pop_back(); | ||||||
|  | 			Log(Log::DEBUG)<<"cant fill"; | ||||||
|  | 			for(const std::pair<cv::Point2i, int>& mipoint : mustInclude) | ||||||
|  | 				Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for(const std::pair<cv::Point2i, int>& includePoint : mustInclude) | ||||||
|  | 		slideRectToPoint(candiate, includePoint.first); | ||||||
|  | 
 | ||||||
|  | 	if(candiate.x < 0) | ||||||
|  | 		candiate.x = 0; | ||||||
|  | 	if(candiate.y < 0) | ||||||
|  | 		candiate.y = 0; | ||||||
|  | 	if(candiate.x+candiate.width > imageSize.width) | ||||||
|  | 		candiate.width = imageSize.width-candiate.x; | ||||||
|  | 	if(candiate.y+candiate.height > imageSize.height) | ||||||
|  | 		candiate.height = imageSize.height-candiate.y; | ||||||
|  | 
 | ||||||
|  | 	return candiate; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | InteligentRoi::InteligentRoi(const Yolo& yolo) | ||||||
|  | { | ||||||
|  | 	personId = yolo.getClassForStr("person"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize) | ||||||
|  | { | ||||||
|  | 	std::vector<std::pair<cv::Point2i, int>> corners; | ||||||
|  | 	for(size_t i = 0; i < detections.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		int priority = detections[i].priority; | ||||||
|  | 		if(detections[i].class_id == personId) | ||||||
|  | 		{ | ||||||
|  | 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2}); | ||||||
|  | 			corners.push_back({detections[i].box.tl(), priority+1}); | ||||||
|  | 			corners.push_back({detections[i].box.br(), priority}); | ||||||
|  | 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1}); | ||||||
|  | 			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			corners.push_back({detections[i].box.tl(), priority}); | ||||||
|  | 			corners.push_back({detections[i].box.br(), priority}); | ||||||
|  | 			corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority}); | ||||||
|  | 			corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority}); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	bool incompleate; | ||||||
|  | 	out = maxRect(incompleate, imageSize, corners); | ||||||
|  | 	return incompleate; | ||||||
|  | } | ||||||
							
								
								
									
										37
									
								
								SmartCrop/intelligentroi.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								SmartCrop/intelligentroi.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,37 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | 
 | ||||||
|  | #include "yolo.h" | ||||||
|  | 
 | ||||||
|  | class InteligentRoi | ||||||
|  | { | ||||||
|  | private: | ||||||
|  | 	int personId; | ||||||
|  | 	static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center); | ||||||
|  | 	static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point); | ||||||
|  | 	static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {}); | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  | 	InteligentRoi(const Yolo& yolo); | ||||||
|  | 	bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize); | ||||||
|  | }; | ||||||
							
								
								
									
										63
									
								
								SmartCrop/log.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								SmartCrop/log.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | ||||||
|  | /**
 | ||||||
|  | * Lubricant Detecter | ||||||
|  | * Copyright (C) 2021 Carl Klemm | ||||||
|  | * | ||||||
|  | * This program is free software; you can redistribute it and/or | ||||||
|  | * modify it under the terms of the GNU General Public License | ||||||
|  | * version 3 as published by the Free Software Foundation. | ||||||
|  | * | ||||||
|  | * This program is distributed in the hope that it will be useful, | ||||||
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | * GNU General Public License for more details. | ||||||
|  | * | ||||||
|  | * You should have received a copy of the GNU General Public License | ||||||
|  | * along with this program; if not, write to the | ||||||
|  | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||||||
|  | * Boston, MA  02110-1301, USA. | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | Log::Log(Level type, bool endlineI): endline(endlineI) | ||||||
|  | { | ||||||
|  | 	msglevel = type; | ||||||
|  | 	if(headers) | ||||||
|  | 	{ | ||||||
|  | 		operator << ("["+getLabel(type)+"] "); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Log::~Log() | ||||||
|  | { | ||||||
|  | 	if(opened && endline) | ||||||
|  | 	{ | ||||||
|  | 		std::cout<<'\n'; | ||||||
|  | 	} | ||||||
|  | 	opened = false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | std::string Log::getLabel(Level level) | ||||||
|  | { | ||||||
|  | 	std::string label; | ||||||
|  | 	switch(level) | ||||||
|  | 	{ | ||||||
|  | 		case DEBUG: | ||||||
|  | 			label = "DEBUG"; | ||||||
|  | 			break; | ||||||
|  | 		case INFO: | ||||||
|  | 			label = "INFO "; | ||||||
|  | 			break; | ||||||
|  | 		case WARN: | ||||||
|  | 			label = "WARN "; | ||||||
|  | 			break; | ||||||
|  | 		case ERROR: | ||||||
|  | 			label = "ERROR"; | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 	return label; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool Log::headers = false; | ||||||
|  | Log::Level Log::level = WARN; | ||||||
							
								
								
									
										64
									
								
								SmartCrop/log.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								SmartCrop/log.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | ||||||
|  | /**
 | ||||||
|  | * eisgenerator | ||||||
|  | * Copyright (C) 2021 Carl Klemm | ||||||
|  | * | ||||||
|  | * This program is free software; you can redistribute it and/or | ||||||
|  | * modify it under the terms of the GNU General Public License | ||||||
|  | * version 3 as published by the Free Software Foundation. | ||||||
|  | * | ||||||
|  | * This program is distributed in the hope that it will be useful, | ||||||
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | * GNU General Public License for more details. | ||||||
|  | * | ||||||
|  | * You should have received a copy of the GNU General Public License | ||||||
|  | * along with this program; if not, write to the | ||||||
|  | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||||||
|  | * Boston, MA  02110-1301, USA. | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | #include <iostream> | ||||||
|  | #include <string> | ||||||
|  | 
 | ||||||
|  | class Log | ||||||
|  | { | ||||||
|  | public: | ||||||
|  | 
 | ||||||
|  | 	enum Level | ||||||
|  | 	{ | ||||||
|  | 		DEBUG, | ||||||
|  | 		INFO, | ||||||
|  | 		WARN, | ||||||
|  | 		ERROR | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  | 	bool opened = false; | ||||||
|  | 	Level msglevel = DEBUG; | ||||||
|  | 	bool endline = true; | ||||||
|  | 
 | ||||||
|  | 	std::string getLabel(Level level); | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  | 
 | ||||||
|  | 	static bool headers; | ||||||
|  | 	static Level level; | ||||||
|  | 
 | ||||||
|  | 	Log() {} | ||||||
|  | 	Log(Level type, bool endlineI = true); | ||||||
|  | 	~Log(); | ||||||
|  | 
 | ||||||
|  | 	template<class T> Log &operator<<(const T &msg) | ||||||
|  | 	{ | ||||||
|  | 		if(msglevel >= level) | ||||||
|  | 		{ | ||||||
|  | 			if(msglevel == ERROR) | ||||||
|  | 				std::cerr<<msg; | ||||||
|  | 			else | ||||||
|  | 				std::cout<<msg; | ||||||
|  | 			opened = true; | ||||||
|  | 		} | ||||||
|  | 		return *this; | ||||||
|  | 	} | ||||||
|  | }; | ||||||
							
								
								
									
										460
									
								
								SmartCrop/main.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										460
									
								
								SmartCrop/main.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,460 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include <filesystem> | ||||||
|  | #include <iostream> | ||||||
|  | #include <opencv2/core.hpp> | ||||||
|  | #include <opencv2/core/types.hpp> | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | #include <opencv2/highgui.hpp> | ||||||
|  | #include <algorithm> | ||||||
|  | #include <execution> | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  | #include <numeric> | ||||||
|  | 
 | ||||||
|  | #include "yolo.h" | ||||||
|  | #include "log.h" | ||||||
|  | #include "options.h" | ||||||
|  | #include "utils.h" | ||||||
|  | #include "intelligentroi.h" | ||||||
|  | #include "seamcarving.h" | ||||||
|  | #include "facerecognizer.h" | ||||||
|  | 
 | ||||||
|  | const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr) | ||||||
|  | { | ||||||
|  | 	const Yolo::Detection* inDetection = nullptr; | ||||||
|  | 	for(const Yolo::Detection& detection : detections) | ||||||
|  | 	{ | ||||||
|  | 		if(ignore && ignore == &detection) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if(detection.box.x <= x && detection.box.x+detection.box.width >= x) | ||||||
|  | 		{ | ||||||
|  | 			if(!inDetection || detection.box.br().x > inDetection->box.br().x) | ||||||
|  | 			inDetection = &detection; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return inDetection; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX) | ||||||
|  | { | ||||||
|  | 	const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections); | ||||||
|  | 
 | ||||||
|  | 	Log(Log::DEBUG, false)<<__func__<<" point "<<x; | ||||||
|  | 
 | ||||||
|  | 	if(!inDetection) | ||||||
|  | 	{ | ||||||
|  | 		const Yolo::Detection* closest = nullptr; | ||||||
|  | 		for(const Yolo::Detection& detection : detections) | ||||||
|  | 		{ | ||||||
|  | 			if(detection.box.x > x) | ||||||
|  | 			{ | ||||||
|  | 				if(closest == nullptr || detection.box.x-x > closest->box.x-x) | ||||||
|  | 					closest = &detection; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		if(closest) | ||||||
|  | 			x = closest->box.x; | ||||||
|  | 		else | ||||||
|  | 			x = imgSizeX; | ||||||
|  | 
 | ||||||
|  | 		Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is"; | ||||||
|  | 		return false; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		x = inDetection->box.br().x; | ||||||
|  | 		Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where "; | ||||||
|  | 		const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection); | ||||||
|  | 		if(candidateDetection && candidateDetection->box.br().x > x) | ||||||
|  | 		{ | ||||||
|  | 			Log(Log::DEBUG)<<"it is again in a box"; | ||||||
|  | 			return findRegionEndpointHoriz(x, detections, imgSizeX); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			Log(Log::DEBUG)<<"it is not in a box"; | ||||||
|  | 			return true; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections) | ||||||
|  | { | ||||||
|  | 	std::vector<std::pair<cv::Mat, bool>> out; | ||||||
|  | 
 | ||||||
|  | 	std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl; | ||||||
|  | 
 | ||||||
|  | 	for(int x = 0; x < image.cols; ++x) | ||||||
|  | 	{ | ||||||
|  | 		int start = x; | ||||||
|  | 		bool frozen = findRegionEndpointHoriz(x, detections, image.cols); | ||||||
|  | 
 | ||||||
|  | 		int width = x-start; | ||||||
|  | 		if(x < image.cols) | ||||||
|  | 			++width; | ||||||
|  | 		cv::Rect rect(start, 0, width, image.rows); | ||||||
|  | 		Log(Log::DEBUG)<<__func__<<" region\t"<<rect; | ||||||
|  | 		cv::Mat slice = image(rect); | ||||||
|  | 		out.push_back({slice, frozen}); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return out; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices) | ||||||
|  | { | ||||||
|  | 	assert(!slices.empty()); | ||||||
|  | 
 | ||||||
|  | 	int cols = 0; | ||||||
|  | 	for(const std::pair<cv::Mat, bool>& slice : slices) | ||||||
|  | 		cols += slice.first.cols; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	cv::Mat image(cols, slices[0].first.rows, slices[0].first.type()); | ||||||
|  | 	Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows; | ||||||
|  | 
 | ||||||
|  | 	int col = 0; | ||||||
|  | 	for(const std::pair<cv::Mat, bool>& slice : slices) | ||||||
|  | 	{ | ||||||
|  | 		cv::Rect rect(col, 0, slice.first.cols, slice.first.rows); | ||||||
|  | 		Log(Log::DEBUG)<<__func__<<' '<<rect; | ||||||
|  | 		slice.first.copyTo(image(rect)); | ||||||
|  | 		col += slice.first.cols-1; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return image; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void transposeRect(cv::Rect& rect) | ||||||
|  | { | ||||||
|  | 	int x = rect.x; | ||||||
|  | 	rect.x = rect.y; | ||||||
|  | 	rect.y = x; | ||||||
|  | 
 | ||||||
|  | 	int width = rect.width; | ||||||
|  | 	rect.width = rect.height; | ||||||
|  | 	rect.height = width; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0) | ||||||
|  | { | ||||||
|  | 	detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end()); | ||||||
|  | 
 | ||||||
|  | 	double aspectRatio = image.cols/static_cast<double>(image.rows); | ||||||
|  | 
 | ||||||
|  | 	Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio; | ||||||
|  | 
 | ||||||
|  | 	bool vertical = false; | ||||||
|  | 	if(aspectRatio > targetAspectRatio) | ||||||
|  | 		vertical = true; | ||||||
|  | 
 | ||||||
|  | 	int requiredLines = 0; | ||||||
|  | 	if(!vertical) | ||||||
|  | 		requiredLines = image.rows*targetAspectRatio - image.cols; | ||||||
|  | 	else | ||||||
|  | 		requiredLines = image.cols/targetAspectRatio - image.rows; | ||||||
|  | 
 | ||||||
|  | 	Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction"; | ||||||
|  | 
 | ||||||
|  | 	if(vertical) | ||||||
|  | 	{ | ||||||
|  | 		cv::transpose(image, image); | ||||||
|  | 		for(Yolo::Detection& detection : detections) | ||||||
|  | 			transposeRect(detection.box); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections); | ||||||
|  | 	Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:"; | ||||||
|  | 	int totalResizableSize = 0; | ||||||
|  | 	for(const std::pair<cv::Mat, bool>& slice : slices) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols; | ||||||
|  | 		if(!slice.second) | ||||||
|  | 			totalResizableSize += slice.first.cols; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(totalResizableSize < requiredLines+1) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols"; | ||||||
|  | 		if(vertical) | ||||||
|  | 			cv::transpose(image, image); | ||||||
|  | 		return false; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<int> seamsForSlice(slices.size(), 0); | ||||||
|  | 	for(size_t i = 0; i < slices.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		if(!slices[i].second) | ||||||
|  | 			seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));; | ||||||
|  | 	for(ssize_t i = slices.size()-1; i >= 0; --i) | ||||||
|  | 	{ | ||||||
|  | 		if(!slices[i].second) | ||||||
|  | 		{ | ||||||
|  | 			seamsForSlice[i] += residual; | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for(size_t i = 0; i < slices.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		if(seamsForSlice[i] != 0) | ||||||
|  | 		{ | ||||||
|  | 			bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true); | ||||||
|  | 			if(!ret) | ||||||
|  | 			{ | ||||||
|  | 				if(vertical) | ||||||
|  | 					transpose(image, image); | ||||||
|  | 				return false; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	image = assembleFromSlicesHoriz(slices); | ||||||
|  | 
 | ||||||
|  | 	if(vertical) | ||||||
|  | 		cv::transpose(image, image); | ||||||
|  | 
 | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections) | ||||||
|  | { | ||||||
|  | 	for(const Yolo::Detection& detection : detections) | ||||||
|  | 	{ | ||||||
|  | 		cv::rectangle(image, detection.box, detection.color, 3); | ||||||
|  | 		std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority); | ||||||
|  | 		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0); | ||||||
|  | 		cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20); | ||||||
|  | 		cv::rectangle(image, textBox, detection.color, cv::FILLED); | ||||||
|  | 		cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void reduceSize(cv::Mat& image, const cv::Size& targetSize) | ||||||
|  | { | ||||||
|  | 	int longTargetSize = std::max(targetSize.width, targetSize.height)*2; | ||||||
|  | 	if(std::max(image.cols, image.rows) > longTargetSize) | ||||||
|  | 	{ | ||||||
|  | 		if(image.cols > image.rows) | ||||||
|  | 		{ | ||||||
|  | 			double ratio = static_cast<double>(longTargetSize)/image.cols; | ||||||
|  | 			cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			double ratio = static_cast<double>(longTargetSize)/image.rows; | ||||||
|  | 			cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer, | ||||||
|  | 	std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath) | ||||||
|  | { | ||||||
|  | 	InteligentRoi intRoi(yolo); | ||||||
|  | 	cv::Mat image = cv::imread(path); | ||||||
|  | 	if(!image.data) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::WARN)<<"could not load image "<<path<<" skipping"; | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	reduceSize(image, config.targetSize); | ||||||
|  | 
 | ||||||
|  | 	std::vector<Yolo::Detection> detections = yolo.runInference(image); | ||||||
|  | 
 | ||||||
|  | 	Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path; | ||||||
|  | 	for(Yolo::Detection& detection : detections) | ||||||
|  | 	{ | ||||||
|  | 		bool hasmatch = false; | ||||||
|  | 		if(recognizer && detection.className == "person") | ||||||
|  | 		{ | ||||||
|  | 			cv::Mat person = image(detection.box); | ||||||
|  | 			reconizerMutex.lock(); | ||||||
|  | 			FaceRecognizer::Detection match = recognizer->isMatch(person); | ||||||
|  | 			reconizerMutex.unlock(); | ||||||
|  | 			if(match.person >= 0) | ||||||
|  | 			{ | ||||||
|  | 				detection.priority += 10; | ||||||
|  | 				hasmatch = true; | ||||||
|  | 				detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect}); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : ""); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	cv::Rect crop; | ||||||
|  | 	bool incompleate = intRoi.getCropRectangle(crop, detections, image.size()); | ||||||
|  | 
 | ||||||
|  | 	if(config.seamCarving && incompleate) | ||||||
|  | 	{ | ||||||
|  | 		bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio()); | ||||||
|  | 		if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio()) | ||||||
|  | 		{ | ||||||
|  | 			detections = yolo.runInference(image); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	cv::Mat croppedImage; | ||||||
|  | 
 | ||||||
|  | 	if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate) | ||||||
|  | 	{ | ||||||
|  | 		intRoi.getCropRectangle(crop, detections, image.size()); | ||||||
|  | 
 | ||||||
|  | 		if(config.debug) | ||||||
|  | 		{ | ||||||
|  | 			cv::Mat debugImage = image.clone(); | ||||||
|  | 			drawDebugInfo(debugImage, crop, detections); | ||||||
|  | 			bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage); | ||||||
|  | 			if(!ret) | ||||||
|  | 				Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping"; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		croppedImage = image(crop); | ||||||
|  | 	} | ||||||
|  | 	else if(!incompleate) | ||||||
|  | 	{ | ||||||
|  | 		croppedImage = image(crop); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		croppedImage = image; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	cv::Mat resizedImage; | ||||||
|  | 	cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC); | ||||||
|  | 	bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage); | ||||||
|  | 	if(!ret) | ||||||
|  | 		Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer, | ||||||
|  | 		std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath) | ||||||
|  | { | ||||||
|  | 	Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false); | ||||||
|  | 	for(std::filesystem::path path : images) | ||||||
|  | 		pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts) | ||||||
|  | { | ||||||
|  | 	std::vector<std::vector<T>> out; | ||||||
|  | 
 | ||||||
|  | 	size_t length = vec.size()/parts; | ||||||
|  | 	size_t remain = vec.size() % parts; | ||||||
|  | 
 | ||||||
|  | 	size_t begin = 0; | ||||||
|  | 	size_t end = 0; | ||||||
|  | 
 | ||||||
|  | 	for (size_t i = 0; i < std::min(parts, vec.size()); ++i) | ||||||
|  | 	{ | ||||||
|  | 		end += (remain > 0) ? (length + !!(remain--)) : length; | ||||||
|  | 		out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end)); | ||||||
|  | 		begin = end; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return out; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int main(int argc, char* argv[]) | ||||||
|  | { | ||||||
|  | 	Log::level = Log::INFO; | ||||||
|  | 
 | ||||||
|  | 	Config config; | ||||||
|  | 	argp_parse(&argp, argc, argv, 0, 0, &config); | ||||||
|  | 
 | ||||||
|  | 	if(config.outputDir.empty()) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::ERROR)<<"a output path \"-o\" is required"; | ||||||
|  | 		return 1; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(config.imagePaths.empty()) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::ERROR)<<"at least one input image or directory is required"; | ||||||
|  | 		return 1; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<std::filesystem::path> imagePaths; | ||||||
|  | 
 | ||||||
|  | 	for(const std::filesystem::path& path : config.imagePaths) | ||||||
|  | 		getImageFiles(path, imagePaths); | ||||||
|  | 
 | ||||||
|  | 	Log(Log::DEBUG)<<"Images:"; | ||||||
|  | 	for(const::std::filesystem::path& path: imagePaths) | ||||||
|  | 		Log(Log::DEBUG)<<path; | ||||||
|  | 
 | ||||||
|  | 	if(imagePaths.empty()) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::ERROR)<<"no image was found\n"; | ||||||
|  | 		return 1; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(!std::filesystem::exists(config.outputDir)) | ||||||
|  | 	{ | ||||||
|  | 		if(!std::filesystem::create_directory(config.outputDir)) | ||||||
|  | 		{ | ||||||
|  | 			Log(Log::ERROR)<<"could not create directory at "<<config.outputDir; | ||||||
|  | 			return 1; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::filesystem::path debugOutputPath(config.outputDir/"debug"); | ||||||
|  | 	if(config.debug) | ||||||
|  | 	{ | ||||||
|  | 		if(!std::filesystem::exists(debugOutputPath)) | ||||||
|  | 			std::filesystem::create_directory(debugOutputPath); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	FaceRecognizer* recognizer = nullptr; | ||||||
|  | 	std::mutex recognizerMutex; | ||||||
|  | 	if(!config.focusPersonImage.empty()) | ||||||
|  | 	{ | ||||||
|  | 		cv::Mat personImage = cv::imread(config.focusPersonImage); | ||||||
|  | 		if(personImage.empty()) | ||||||
|  | 		{ | ||||||
|  | 			Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage; | ||||||
|  | 			return 1; | ||||||
|  | 		} | ||||||
|  | 		recognizer = new FaceRecognizer(); | ||||||
|  | 		recognizer->addReferances({personImage}); | ||||||
|  | 		recognizer->setThreshold(config.threshold); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<std::thread> threads; | ||||||
|  | 	std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency()); | ||||||
|  | 
 | ||||||
|  | 	for(size_t i = 0; i < imagePathParts.size(); ++i) | ||||||
|  | 		threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config),  recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath))); | ||||||
|  | 
 | ||||||
|  | 	for(std::thread& thread : threads) | ||||||
|  | 		thread.join(); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
							
								
								
									
										117
									
								
								SmartCrop/options.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								SmartCrop/options.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,117 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  | #include <argp.h> | ||||||
|  | #include <iostream> | ||||||
|  | #include <filesystem> | ||||||
|  | #include <opencv2/core/types.hpp> | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | const char *argp_program_version = "AIImagePreprocesses"; | ||||||
|  | const char *argp_program_bug_address = "<carl@uvos.xyz>"; | ||||||
|  | static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training"; | ||||||
|  | static char args_doc[] = "FILE(S)"; | ||||||
|  | 
 | ||||||
|  | static struct argp_option options[] = | ||||||
|  | { | ||||||
|  |   {"verbose",		'v', 0,				0,	"Show debug messages" }, | ||||||
|  |   {"quiet", 		'q', 0,				0,	"only output data" }, | ||||||
|  |   {"model", 		'm', "[FILENAME]",	0,	"YoloV8 model to use for detection" }, | ||||||
|  |   {"classes", 		'c', "[FILENAME]",	0,	"classes text file to use" }, | ||||||
|  |   {"out",	 		'o', "[DIRECTORY]",	0,	"directory whre images are to be saved" }, | ||||||
|  |   {"debug", 		'd', 0,				0,	"output debug images" }, | ||||||
|  |   {"seam-carving", 	's', 0,				0,	"use seam carving to change image aspect ratio instead of croping"}, | ||||||
|  |   {"size", 			'z', "[PIXELS]",	0,	"target output size, default: 512"}, | ||||||
|  |   {"focus-person",	'f', "[FILENAME]",	0,	"a file name to an image of a person that the crop should focus on"}, | ||||||
|  |   {"person-threshold",	't', "[NUMBER]",	0,	"the threshold at witch to consider a person matched, defaults to 0.363"}, | ||||||
|  |   {0} | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct Config | ||||||
|  | { | ||||||
|  | 	std::vector<std::filesystem::path> imagePaths; | ||||||
|  | 	std::filesystem::path modelPath; | ||||||
|  | 	std::filesystem::path classesPath; | ||||||
|  | 	std::filesystem::path outputDir; | ||||||
|  | 	std::filesystem::path focusPersonImage; | ||||||
|  | 	bool seamCarving = false; | ||||||
|  | 	bool debug = false; | ||||||
|  | 	double threshold = 0.363; | ||||||
|  | 	cv::Size targetSize = cv::Size(512, 512); | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static error_t parse_opt (int key, char *arg, struct argp_state *state) | ||||||
|  | { | ||||||
|  | 	Config *config = reinterpret_cast<Config*>(state->input); | ||||||
|  | 	try | ||||||
|  | 	{ | ||||||
|  | 		switch (key) | ||||||
|  | 		{ | ||||||
|  | 		case 'q': | ||||||
|  | 			Log::level = Log::ERROR; | ||||||
|  | 			break; | ||||||
|  | 		case 'v': | ||||||
|  | 			Log::level = Log::DEBUG; | ||||||
|  | 			break; | ||||||
|  | 		case 'm': | ||||||
|  | 			config->modelPath = arg; | ||||||
|  | 			break; | ||||||
|  | 		case 'c': | ||||||
|  | 			config->classesPath = arg; | ||||||
|  | 			break; | ||||||
|  | 		case 'd': | ||||||
|  | 			config->debug = true; | ||||||
|  | 			break; | ||||||
|  | 		case 'o': | ||||||
|  | 			config->outputDir.assign(arg); | ||||||
|  | 			break; | ||||||
|  | 		case 's': | ||||||
|  | 			config->seamCarving = true; | ||||||
|  | 			break; | ||||||
|  | 		case 'f': | ||||||
|  | 			config->focusPersonImage = arg; | ||||||
|  | 			break; | ||||||
|  | 		case 't': | ||||||
|  | 			config->threshold = std::atof(arg); | ||||||
|  | 			break; | ||||||
|  | 		case 'z': | ||||||
|  | 		{ | ||||||
|  | 			int x = std::stoi(arg); | ||||||
|  | 			config->targetSize = cv::Size(x, x); | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 		case ARGP_KEY_ARG: | ||||||
|  | 			config->imagePaths.push_back(arg); | ||||||
|  | 			break; | ||||||
|  | 		default: | ||||||
|  | 			return ARGP_ERR_UNKNOWN; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	catch(const std::invalid_argument& ex) | ||||||
|  | 	{ | ||||||
|  | 		std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n"; | ||||||
|  | 		return ARGP_KEY_ERROR; | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct argp argp = {options, parse_opt, args_doc, doc}; | ||||||
							
								
								
									
										35
									
								
								SmartCrop/readfile.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								SmartCrop/readfile.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,35 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | #include <string> | ||||||
|  | #include <filesystem> | ||||||
|  | #include <fstream> | ||||||
|  | #include <stdexcept> | ||||||
|  | #include <sstream> | ||||||
|  | 
 | ||||||
|  | inline std::string readFile(const std::filesystem::path& path) | ||||||
|  | { | ||||||
|  | 	std::ifstream file(path); | ||||||
|  | 	if(!file.is_open()) | ||||||
|  | 		throw std::runtime_error(std::string("could not open file ") + path.string()); | ||||||
|  | 	std::stringstream ss; | ||||||
|  | 	ss<<file.rdbuf(); | ||||||
|  | 	return ss.str(); | ||||||
|  | } | ||||||
							
								
								
									
										376
									
								
								SmartCrop/seamcarving.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										376
									
								
								SmartCrop/seamcarving.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,376 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include "seamcarving.h" | ||||||
|  | 
 | ||||||
|  | #include <opencv2/imgcodecs.hpp> | ||||||
|  | #include <opencv2/highgui/highgui.hpp> | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | #include <iostream> | ||||||
|  | #include <filesystem> | ||||||
|  | #include <cfloat> | ||||||
|  | #include <vector> | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect) | ||||||
|  | { | ||||||
|  | 	cv::Mat newFrame = image.clone(); | ||||||
|  | 	assert(!newFrame.empty()); | ||||||
|  | 	std::vector<std::vector<int>> vecSeams; | ||||||
|  | 
 | ||||||
|  | 	for(int i = 0; i < seams; i++) | ||||||
|  | 	{ | ||||||
|  | 		//Gradient Magnitude for intensity of image.
 | ||||||
|  | 		cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame); | ||||||
|  | 		//Use DP to create the real energy map that is used for path calculation.
 | ||||||
|  | 		// Strictly using vertical paths for testing simplicity.
 | ||||||
|  | 		cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude); | ||||||
|  | 
 | ||||||
|  | 		if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0) | ||||||
|  | 			return false; | ||||||
|  | 		std::vector<int> seam = getLeastImportantPath(pathIntensityMat); | ||||||
|  | 		vecSeams.push_back(seam); | ||||||
|  | 		if(seamsVect) | ||||||
|  | 			seamsVect->push_back(seam); | ||||||
|  | 
 | ||||||
|  | 		newFrame = removeLeastImportantPath(newFrame, seam); | ||||||
|  | 
 | ||||||
|  | 		if(newFrame.rows == 0 || newFrame.cols == 0) | ||||||
|  | 			return false; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (grow) | ||||||
|  | 	{ | ||||||
|  | 		cv::Mat growMat = image.clone(); | ||||||
|  | 
 | ||||||
|  | 		for(size_t i = 0; i < vecSeams.size(); i++) | ||||||
|  | 		{ | ||||||
|  | 			growMat = addLeastImportantPath(growMat,vecSeams[i]); | ||||||
|  | 		} | ||||||
|  | 		image = growMat; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		image = newFrame; | ||||||
|  | 	} | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect) | ||||||
|  | { | ||||||
|  | 	cv::transpose(image, image); | ||||||
|  | 	bool ret = strechImage(image, seams, grow, seamsVect); | ||||||
|  | 	cv::transpose(image, image); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow) | ||||||
|  | { | ||||||
|  | 	std::vector<std::vector<int>> seamsVect; | ||||||
|  | 	seamsImage = image.clone(); | ||||||
|  | 
 | ||||||
|  | 	bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect); | ||||||
|  | 	if(!ret) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	for(size_t i = 0; i < seamsVect.size(); ++i) | ||||||
|  | 		seamsImage = drawSeam(seamsImage, seamsVect[i]); | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img) | ||||||
|  | { | ||||||
|  | 	// find partial derivative of x-axis and y-axis seperately
 | ||||||
|  | 	// sum up the partial derivates
 | ||||||
|  | 	float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1}; | ||||||
|  | 	cv::Mat xFilter(3, 3, CV_32FC1, pd); | ||||||
|  | 	cv::Mat yFilter = xFilter.t(); | ||||||
|  | 	cv::Mat grayImg; | ||||||
|  | 	cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY); | ||||||
|  | 	cv::Mat dxImg; | ||||||
|  | 	cv::Mat dyImg; | ||||||
|  | 
 | ||||||
|  | 	cv::filter2D(grayImg, dxImg, 0, xFilter); | ||||||
|  | 	cv::filter2D(grayImg, dyImg, 0, yFilter); | ||||||
|  | 	//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
 | ||||||
|  | 	//cv::Mat absDxImg;
 | ||||||
|  | 	//cv::Mat absDyImg;
 | ||||||
|  | 	//cv::absdiff(dxImg, zeroMat, absDxImg);
 | ||||||
|  | 	//cv::absdiff(dyImg, zeroMat, absDyImg);
 | ||||||
|  | 	cv::Mat absDxImg = cv::abs(dxImg); | ||||||
|  | 	cv::Mat absDyImg = cv::abs(dyImg); | ||||||
|  | 
 | ||||||
|  | 	cv::Mat energyImg; | ||||||
|  | 	cv::add(absDxImg, absDyImg, energyImg); | ||||||
|  | 	return energyImg; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame) | ||||||
|  | { | ||||||
|  | 	cv::Mat grayScale; | ||||||
|  | 	cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY); | ||||||
|  | 	cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1); | ||||||
|  | 	cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1); | ||||||
|  | 	cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1); | ||||||
|  | 	Sobel(grayScale, drv, CV_16SC1, 1, 0); | ||||||
|  | 	drv.convertTo(drv32f, CV_32FC1); | ||||||
|  | 	cv::accumulateSquare(drv32f, mag); | ||||||
|  | 	Sobel(grayScale, drv, CV_16SC1, 0, 1); | ||||||
|  | 	drv.convertTo(drv32f, CV_32FC1); | ||||||
|  | 	cv::accumulateSquare(drv32f, mag); | ||||||
|  | 	cv::sqrt(mag, mag); | ||||||
|  | 	return mag; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float SeamCarving::intensity(float currIndex, int start, int end) | ||||||
|  | { | ||||||
|  | 	if(start < 0 || start >= end) | ||||||
|  | 	{ | ||||||
|  | 		return FLT_MAX; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		return currIndex; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap) | ||||||
|  | { | ||||||
|  | 	cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1); | ||||||
|  | 
 | ||||||
|  | 	if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0) | ||||||
|  | 	{ | ||||||
|  | 		return cv::Mat(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	//First row of intensity paths is the same as the energy map
 | ||||||
|  | 	rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0)); | ||||||
|  | 	float max = 0; | ||||||
|  | 
 | ||||||
|  | 	//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
 | ||||||
|  | 	for(int row = 1; row < pathIntensityMap.rows; row++) | ||||||
|  | 	{ | ||||||
|  | 		for(int col = 0; col < pathIntensityMap.cols; col++) | ||||||
|  | 		{ | ||||||
|  | 			//The initial intensity of the pixel is its raw intensity
 | ||||||
|  | 			float pixelIntensity = rawEnergyMap.at<float>(row, col); | ||||||
|  | 			//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
 | ||||||
|  | 			float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols); | ||||||
|  | 			float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols); | ||||||
|  | 			float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols); | ||||||
|  | 
 | ||||||
|  | 			float minIntensity = std::min(p1, p2); | ||||||
|  | 			minIntensity = std::min(minIntensity, p3); | ||||||
|  | 
 | ||||||
|  | 			pixelIntensity += minIntensity; | ||||||
|  | 
 | ||||||
|  | 			max = std::max(max, pixelIntensity); | ||||||
|  | 			pathIntensityMap.at<float>(row, col) = pixelIntensity; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return pathIntensityMap; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap) | ||||||
|  | { | ||||||
|  | 	if(importanceMap.total() == 0) | ||||||
|  | 	{ | ||||||
|  | 		return std::vector<int>(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
 | ||||||
|  | 	float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0); | ||||||
|  | 	int minCol = 0; | ||||||
|  | 	for (int col = 1; col < importanceMap.cols; col++) | ||||||
|  | 	{ | ||||||
|  | 		float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col); | ||||||
|  | 		if(currPixel < minImportance) | ||||||
|  | 		{ | ||||||
|  | 			minCol = col; | ||||||
|  | 			minImportance = currPixel; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<int> leastEnergySeam(importanceMap.rows); | ||||||
|  | 	leastEnergySeam[importanceMap.rows-1] = minCol; | ||||||
|  | 	for(int row = importanceMap.rows - 2; row >= 0; row--) | ||||||
|  | 	{ | ||||||
|  | 		float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols); | ||||||
|  | 		float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols); | ||||||
|  | 		float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols); | ||||||
|  | 		//Adjust the min column for path following
 | ||||||
|  | 		if(p1 < p2 && p1 < p3) | ||||||
|  | 		{ | ||||||
|  | 			minCol -= 1; | ||||||
|  | 		} | ||||||
|  | 		else if(p3 < p1 && p3 < p2) | ||||||
|  | 		{ | ||||||
|  | 			minCol += 1; | ||||||
|  | 		} | ||||||
|  | 		leastEnergySeam[row] = minCol; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return leastEnergySeam; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam) | ||||||
|  | { | ||||||
|  | 	cv::Size orgSize = original.size(); | ||||||
|  | 	// new mat needs to shrink by one collumn
 | ||||||
|  | 	cv::Size size = cv::Size(orgSize.width-1, orgSize.height); | ||||||
|  | 	cv::Mat newMat = cv::Mat(size, original.type()); | ||||||
|  | 
 | ||||||
|  | 	for(size_t row = 0; row < seam.size(); row++) | ||||||
|  | 	{ | ||||||
|  | 		removePixel(original, newMat, row, seam[row]); | ||||||
|  | 	} | ||||||
|  | 	return newMat; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) | ||||||
|  | { | ||||||
|  | 	int width = original.cols; | ||||||
|  | 	int channels = original.channels(); | ||||||
|  | 	int originRowStart = row * channels * width; | ||||||
|  | 	int newRowStart = row * channels * (width - 1); | ||||||
|  | 	int firstNum = minCol * channels; | ||||||
|  | 	unsigned char *rawOrig = original.data; | ||||||
|  | 	unsigned char *rawOutput = outputMat.data; | ||||||
|  | 
 | ||||||
|  | 	//std::cout << "originRowStart: " << originRowStart << std::endl;
 | ||||||
|  | 	//std::cout << "newRowStart: " << newRowStart << std::endl;
 | ||||||
|  | 	//std::cout << "firstNum: " << firstNum << std::endl;
 | ||||||
|  | 	memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); | ||||||
|  | 
 | ||||||
|  | 	int originRowMid = originRowStart + (minCol + 1) * channels; | ||||||
|  | 	int newRowMid = newRowStart + minCol * channels; | ||||||
|  | 	int secondNum = (width - 1) * channels - firstNum; | ||||||
|  | 
 | ||||||
|  | 	//std::cout << "originRowMid: " << originRowMid << std::endl;
 | ||||||
|  | 	//std::cout << "newRowMid: " << newRowMid << std::endl;
 | ||||||
|  | 	//std::cout << "secondNum: " << secondNum << std::endl;
 | ||||||
|  | 	memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); | ||||||
|  | 
 | ||||||
|  | 	int leftPixel = minCol - 1; | ||||||
|  | 	int rightPixel = minCol + 1; | ||||||
|  | 
 | ||||||
|  | 	int byte1 = rawOrig[originRowStart + minCol * channels]; | ||||||
|  | 	int byte2 = rawOrig[originRowStart + minCol * channels + 1]; | ||||||
|  | 	int byte3 = rawOrig[originRowStart + minCol * channels + 2]; | ||||||
|  | 
 | ||||||
|  | 	if (rightPixel < width) | ||||||
|  | 	{ | ||||||
|  | 		int byte1R = rawOrig[originRowStart + rightPixel * channels]; | ||||||
|  | 		int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; | ||||||
|  | 		int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; | ||||||
|  | 		rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); | ||||||
|  | 		rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); | ||||||
|  | 		rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(leftPixel >= 0) | ||||||
|  | 	{ | ||||||
|  | 		int byte1L = rawOrig[originRowStart + leftPixel*channels]; | ||||||
|  | 		int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; | ||||||
|  | 		int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam) | ||||||
|  | { | ||||||
|  | 	cv::Size orgSize = original.size(); | ||||||
|  | 	// new mat needs to grow by one column
 | ||||||
|  | 	cv::Size size = cv::Size(orgSize.width+1, orgSize.height); | ||||||
|  | 	cv::Mat newMat = cv::Mat(size, original.type()); | ||||||
|  | 
 | ||||||
|  | 	for(size_t row = 0; row < seam.size(); row++) | ||||||
|  | 	{ | ||||||
|  | 		//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
 | ||||||
|  | 		addPixel(original, newMat, row, seam[row]); | ||||||
|  | 	} | ||||||
|  | 	return newMat; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol) | ||||||
|  | { | ||||||
|  | 	int width = original.cols; | ||||||
|  | 	int channels = original.channels(); | ||||||
|  | 	int originRowStart = row * channels * width; | ||||||
|  | 	int newRowStart = row * channels * (width + 1); | ||||||
|  | 	int firstNum = (minCol + 1) * channels; | ||||||
|  | 
 | ||||||
|  | 	unsigned char *rawOrig = original.data; | ||||||
|  | 	unsigned char *rawOutput = outputMat.data; | ||||||
|  | 
 | ||||||
|  | 	memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum); | ||||||
|  | 
 | ||||||
|  | 	memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels); | ||||||
|  | 
 | ||||||
|  | 	int originRowMid = originRowStart + ((minCol + 1) * channels); | ||||||
|  | 	int newRowMid = newRowStart + ((minCol + 2) * channels); | ||||||
|  | 	int secondNum = (width * channels) - firstNum; | ||||||
|  | 
 | ||||||
|  | 	memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum); | ||||||
|  | 
 | ||||||
|  | 	int leftPixel = minCol - 1; | ||||||
|  | 	int rightPixel = minCol + 1; | ||||||
|  | 
 | ||||||
|  | 	int byte1 = rawOrig[originRowStart + minCol * channels]; | ||||||
|  | 	int byte2 = rawOrig[originRowStart + minCol * channels + 1]; | ||||||
|  | 	int byte3 = rawOrig[originRowStart + minCol * channels + 2]; | ||||||
|  | 
 | ||||||
|  | 	if (rightPixel < width) | ||||||
|  | 	{ | ||||||
|  | 		int byte1R = rawOrig[originRowStart + rightPixel * channels]; | ||||||
|  | 		int byte2R = rawOrig[originRowStart + rightPixel * channels + 1]; | ||||||
|  | 		int byte3R = rawOrig[originRowStart + rightPixel * channels + 2]; | ||||||
|  | 		rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2); | ||||||
|  | 		rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2); | ||||||
|  | 		rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(leftPixel >= 0) | ||||||
|  | 	{ | ||||||
|  | 		int byte1L = rawOrig[originRowStart + leftPixel*channels]; | ||||||
|  | 		int byte2L = rawOrig[originRowStart + leftPixel*channels+1]; | ||||||
|  | 		int byte3L = rawOrig[originRowStart + leftPixel*channels+2]; | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2); | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2); | ||||||
|  | 		rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam) | ||||||
|  | { | ||||||
|  | 	cv::Mat retMat = frame.clone(); | ||||||
|  | 	for(int row = 0; row < frame.rows; row++) | ||||||
|  | 	{ | ||||||
|  | 		for(int col = 0; col < frame.cols; col++) | ||||||
|  | 		{ | ||||||
|  | 			retMat.at<cv::Vec3b>(row, seam[row])[0] = 0; | ||||||
|  | 			retMat.at<cv::Vec3b>(row, seam[row])[1] = 255; | ||||||
|  | 			retMat.at<cv::Vec3b>(row, seam[row])[2] = 0; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return retMat; | ||||||
|  | } | ||||||
							
								
								
									
										43
									
								
								SmartCrop/seamcarving.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								SmartCrop/seamcarving.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,43 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <opencv2/core/core.hpp> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | class SeamCarving | ||||||
|  | { | ||||||
|  | private: | ||||||
|  | 	static cv::Mat GetEnergyImg(const cv::Mat &img); | ||||||
|  | 	static cv::Mat computeGradientMagnitude(const cv::Mat &frame); | ||||||
|  | 	static float intensity(float currIndex, int start, int end); | ||||||
|  | 	static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap); | ||||||
|  | 	static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap); | ||||||
|  | 	static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam); | ||||||
|  | 	static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol); | ||||||
|  | 	static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam); | ||||||
|  | 	static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol); | ||||||
|  | 	static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam); | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  | 	static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr); | ||||||
|  | 	static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr); | ||||||
|  | 	static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow); | ||||||
|  | }; | ||||||
							
								
								
									
										46
									
								
								SmartCrop/tokenize.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								SmartCrop/tokenize.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include "tokenize.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar) | ||||||
|  | { | ||||||
|  | 	std::vector<std::string> tokens; | ||||||
|  | 	std::string token; | ||||||
|  | 	bool inBaracket = false; | ||||||
|  | 	for(size_t i = 0; i < str.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar)) | ||||||
|  | 		{ | ||||||
|  | 			tokens.push_back(token); | ||||||
|  | 			token.clear(); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			token.push_back(str[i]); | ||||||
|  | 		} | ||||||
|  | 		if(ignoreBraket == str[i]) | ||||||
|  | 			inBaracket = !inBaracket; | ||||||
|  | 	} | ||||||
|  | 	if(!inBaracket) | ||||||
|  | 		tokens.push_back(token); | ||||||
|  | 	return tokens; | ||||||
|  | } | ||||||
							
								
								
									
										26
									
								
								SmartCrop/tokenize.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								SmartCrop/tokenize.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0', | ||||||
|  | 											  const char escapeChar = '\0'); | ||||||
							
								
								
									
										80
									
								
								SmartCrop/utils.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								SmartCrop/utils.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,80 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include "utils.h" | ||||||
|  | 
 | ||||||
|  | #include <filesystem> | ||||||
|  | #include <vector> | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | 
 | ||||||
|  | bool isImagePath(const std::filesystem::path& path) | ||||||
|  | { | ||||||
|  | 	return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths) | ||||||
|  | { | ||||||
|  | 	if(isImagePath(path)) | ||||||
|  | 	{ | ||||||
|  | 		paths.push_back(path); | ||||||
|  | 	} | ||||||
|  | 	else if(std::filesystem::is_directory(path)) | ||||||
|  | 	{ | ||||||
|  | 		for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path)) | ||||||
|  | 		{ | ||||||
|  | 			if(std::filesystem::is_directory(dirent.path())) | ||||||
|  | 				getImageFiles(dirent.path(), paths); | ||||||
|  | 			else if(isImagePath(dirent.path())) | ||||||
|  | 				paths.push_back(dirent.path()); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points) | ||||||
|  | { | ||||||
|  | 	int left = std::numeric_limits<int>::max(); | ||||||
|  | 	int right = std::numeric_limits<int>::min(); | ||||||
|  | 	int top = std::numeric_limits<int>::max(); | ||||||
|  | 	int bottom = std::numeric_limits<int>::min(); | ||||||
|  | 
 | ||||||
|  | 	for(const std::pair<cv::Point, int>& point : points) | ||||||
|  | 	{ | ||||||
|  | 		left = point.first.x < left ? point.first.x : left; | ||||||
|  | 		right = point.first.x > right ? point.first.x : right; | ||||||
|  | 
 | ||||||
|  | 		top = point.first.y < top ? point.first.y : top; | ||||||
|  | 		bottom = point.first.y > bottom ? point.first.y : bottom; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return cv::Rect(left, top, right-left, bottom-top); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB) | ||||||
|  | { | ||||||
|  | 	cv::Vec2i a(pointA.x, pointA.y); | ||||||
|  | 	cv::Vec2i b(pointB.x, pointB.y); | ||||||
|  | 	return cv::norm(a-b); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool pointInRect(const cv::Point2i& point, const cv::Rect& rect) | ||||||
|  | { | ||||||
|  | 	return point.x >= rect.x && point.x <= rect.x+rect.width && | ||||||
|  | 		   point.y >= rect.y && point.y <= rect.y+rect.height; | ||||||
|  | } | ||||||
							
								
								
									
										34
									
								
								SmartCrop/utils.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								SmartCrop/utils.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,34 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <filesystem> | ||||||
|  | #include <vector> | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | 
 | ||||||
|  | bool isImagePath(const std::filesystem::path& path); | ||||||
|  | 
 | ||||||
|  | void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths); | ||||||
|  | 
 | ||||||
|  | cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points); | ||||||
|  | 
 | ||||||
|  | double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB); | ||||||
|  | 
 | ||||||
|  | bool pointInRect(const cv::Point2i& point, const cv::Rect& rect); | ||||||
							
								
								
									
										278
									
								
								SmartCrop/yolo.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								SmartCrop/yolo.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,278 @@ | ||||||
|  | //
 | ||||||
|  | // SmartCrop - A tool for content aware croping of images
 | ||||||
|  | // Copyright (C) 2024 Carl Philipp Klemm
 | ||||||
|  | //
 | ||||||
|  | // This file is part of SmartCrop.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is free software: you can redistribute it and/or modify
 | ||||||
|  | // it under the terms of the GNU General Public License as published by
 | ||||||
|  | // the Free Software Foundation, either version 3 of the License, or
 | ||||||
|  | // (at your option) any later version.
 | ||||||
|  | //
 | ||||||
|  | // SmartCrop is distributed in the hope that it will be useful,
 | ||||||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||||
|  | // GNU General Public License for more details.
 | ||||||
|  | //
 | ||||||
|  | // You should have received a copy of the GNU General Public License
 | ||||||
|  | // along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | #include <opencv2/dnn/dnn.hpp> | ||||||
|  | #include <algorithm> | ||||||
|  | #include <string> | ||||||
|  | #include <stdexcept> | ||||||
|  | 
 | ||||||
|  | #include "yolo.h" | ||||||
|  | #include "readfile.h" | ||||||
|  | #include "tokenize.h" | ||||||
|  | #include "log.h" | ||||||
|  | 
 | ||||||
|  | #define INCBIN_PREFIX r | ||||||
|  | #include "incbin.h" | ||||||
|  | 
 | ||||||
|  | INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt"); | ||||||
|  | INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx"); | ||||||
|  | 
 | ||||||
|  | Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape, | ||||||
|  | 		const std::filesystem::path& classesTxtFilePath, bool runWithOCl) | ||||||
|  | { | ||||||
|  | 	modelPath = onnxModelPath; | ||||||
|  | 	modelShape = modelInputShape; | ||||||
|  | 
 | ||||||
|  | 	if(classesTxtFilePath.empty()) | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::INFO)<<"Using builtin classes"; | ||||||
|  | 		loadClasses(rdefaultClassesData); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		std::string classesStr = readFile(classesTxtFilePath); | ||||||
|  | 		loadClasses(classesStr); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if(!modelPath.empty()) | ||||||
|  | 	{ | ||||||
|  | 		net = cv::dnn::readNetFromONNX(modelPath); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		Log(Log::INFO)<<"Using builtin yolo model"; | ||||||
|  | 		net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize); | ||||||
|  | 	} | ||||||
|  | 	if(runWithOCl) | ||||||
|  | 	{ | ||||||
|  | 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT); | ||||||
|  | 		net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | ||||||
|  | 		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input) | ||||||
|  | { | ||||||
|  | 	cv::Mat modelInput = input; | ||||||
|  | 	if (letterBoxForSquare && modelShape.width == modelShape.height) | ||||||
|  | 		modelInput = formatToSquare(modelInput); | ||||||
|  | 
 | ||||||
|  | 	cv::Mat blob; | ||||||
|  | 	cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); | ||||||
|  | 	net.setInput(blob); | ||||||
|  | 
 | ||||||
|  | 	std::vector<cv::Mat> outputs; | ||||||
|  | 	net.forward(outputs, net.getUnconnectedOutLayersNames()); | ||||||
|  | 
 | ||||||
|  | 	int rows = outputs[0].size[1]; | ||||||
|  | 	int dimensions = outputs[0].size[2]; | ||||||
|  | 
 | ||||||
|  | 	bool yolov8 = false; | ||||||
|  | 	// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
 | ||||||
|  | 	// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
 | ||||||
|  | 	if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
 | ||||||
|  | 	{ | ||||||
|  | 		yolov8 = true; | ||||||
|  | 		rows = outputs[0].size[2]; | ||||||
|  | 		dimensions = outputs[0].size[1]; | ||||||
|  | 
 | ||||||
|  | 		outputs[0] = outputs[0].reshape(1, dimensions); | ||||||
|  | 		cv::transpose(outputs[0], outputs[0]); | ||||||
|  | 	} | ||||||
|  | 	float *data = (float *)outputs[0].data; | ||||||
|  | 
 | ||||||
|  | 	float x_factor = modelInput.cols / modelShape.width; | ||||||
|  | 	float y_factor = modelInput.rows / modelShape.height; | ||||||
|  | 
 | ||||||
|  | 	std::vector<int> class_ids; | ||||||
|  | 	std::vector<float> confidences; | ||||||
|  | 	std::vector<cv::Rect> boxes; | ||||||
|  | 
 | ||||||
|  | 	for (int i = 0; i < rows; ++i) | ||||||
|  | 	{ | ||||||
|  | 		if (yolov8) | ||||||
|  | 		{ | ||||||
|  | 			float *classes_scores = data+4; | ||||||
|  | 
 | ||||||
|  | 			cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); | ||||||
|  | 			cv::Point class_id; | ||||||
|  | 			double maxClassScore; | ||||||
|  | 
 | ||||||
|  | 			minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); | ||||||
|  | 
 | ||||||
|  | 			if (maxClassScore > modelScoreThreshold) | ||||||
|  | 			{ | ||||||
|  | 				confidences.push_back(maxClassScore); | ||||||
|  | 				class_ids.push_back(class_id.x); | ||||||
|  | 
 | ||||||
|  | 				float x = data[0]; | ||||||
|  | 				float y = data[1]; | ||||||
|  | 				float w = data[2]; | ||||||
|  | 				float h = data[3]; | ||||||
|  | 
 | ||||||
|  | 				int left = int((x - 0.5 * w) * x_factor); | ||||||
|  | 				int top = int((y - 0.5 * h) * y_factor); | ||||||
|  | 
 | ||||||
|  | 				int width = int(w * x_factor); | ||||||
|  | 				int height = int(h * y_factor); | ||||||
|  | 
 | ||||||
|  | 				boxes.push_back(cv::Rect(left, top, width, height)); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		else // yolov5
 | ||||||
|  | 		{ | ||||||
|  | 			float confidence = data[4]; | ||||||
|  | 
 | ||||||
|  | 			if (confidence >= modelConfidenceThreshold) | ||||||
|  | 			{ | ||||||
|  | 				float *classes_scores = data+5; | ||||||
|  | 
 | ||||||
|  | 				cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); | ||||||
|  | 				cv::Point class_id; | ||||||
|  | 				double max_class_score; | ||||||
|  | 
 | ||||||
|  | 				minMaxLoc(scores, 0, &max_class_score, 0, &class_id); | ||||||
|  | 
 | ||||||
|  | 				if (max_class_score > modelScoreThreshold) | ||||||
|  | 				{ | ||||||
|  | 					confidences.push_back(confidence); | ||||||
|  | 					class_ids.push_back(class_id.x); | ||||||
|  | 
 | ||||||
|  | 					float x = data[0]; | ||||||
|  | 					float y = data[1]; | ||||||
|  | 					float w = data[2]; | ||||||
|  | 					float h = data[3]; | ||||||
|  | 
 | ||||||
|  | 					int left = int((x - 0.5 * w) * x_factor); | ||||||
|  | 					int top = int((y - 0.5 * h) * y_factor); | ||||||
|  | 
 | ||||||
|  | 					int width = int(w * x_factor); | ||||||
|  | 					int height = int(h * y_factor); | ||||||
|  | 
 | ||||||
|  | 					boxes.push_back(cv::Rect(left, top, width, height)); | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		data += dimensions; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	std::vector<int> nms_result; | ||||||
|  | 	cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); | ||||||
|  | 
 | ||||||
|  | 	std::vector<Yolo::Detection> detections{}; | ||||||
|  | 	for(unsigned long i = 0; i < nms_result.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		int idx = nms_result[i]; | ||||||
|  | 
 | ||||||
|  | 		Yolo::Detection result; | ||||||
|  | 		result.class_id = class_ids[idx]; | ||||||
|  | 		result.confidence = confidences[idx]; | ||||||
|  | 
 | ||||||
|  | 		std::random_device rd; | ||||||
|  | 		std::mt19937 gen(rd()); | ||||||
|  | 		std::uniform_int_distribution<int> dis(100, 255); | ||||||
|  | 		result.color = cv::Scalar(dis(gen), | ||||||
|  | 		                          dis(gen), | ||||||
|  | 		                          dis(gen)); | ||||||
|  | 
 | ||||||
|  | 		result.className = classes[result.class_id].first; | ||||||
|  | 		result.priority = classes[result.class_id].second; | ||||||
|  | 		clampBox(boxes[idx], input.size()); | ||||||
|  | 		result.box = boxes[idx]; | ||||||
|  | 		detections.push_back(result); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return detections; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void Yolo::clampBox(cv::Rect& box, const cv::Size& size) | ||||||
|  | { | ||||||
|  | 	if(box.x < 0) | ||||||
|  | 	{ | ||||||
|  | 		box.width += box.x; | ||||||
|  | 		box.x = 0; | ||||||
|  | 	} | ||||||
|  | 	if(box.y < 0) | ||||||
|  | 	{ | ||||||
|  | 		box.height += box.y; | ||||||
|  | 		box.y = 0; | ||||||
|  | 	} | ||||||
|  | 	if(box.x+box.width > size.width) | ||||||
|  | 		box.width = size.width - box.x; | ||||||
|  | 	if(box.y+box.height > size.height) | ||||||
|  | 		box.height = size.height - box.y; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Yolo::loadClasses(const std::string& classesStr) | ||||||
|  | { | ||||||
|  | 	std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\'); | ||||||
|  | 	classes.clear(); | ||||||
|  | 	for(std::string& instance : candidateClasses) | ||||||
|  | 	{ | ||||||
|  | 		if(instance.size() < 2) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\'); | ||||||
|  | 
 | ||||||
|  | 		if(*tokens[0].begin() == '"') | ||||||
|  | 			instance.erase(tokens[0].begin()); | ||||||
|  | 		if(tokens[0].back() == '"') | ||||||
|  | 			tokens[0].pop_back(); | ||||||
|  | 		int priority = -1; | ||||||
|  | 		if(tokens.size() > 1) | ||||||
|  | 		{ | ||||||
|  | 			try | ||||||
|  | 			{ | ||||||
|  | 				priority = std::stoi(tokens[1]); | ||||||
|  | 			} | ||||||
|  | 			catch(const std::invalid_argument& err) | ||||||
|  | 			{ | ||||||
|  | 				Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what(); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		classes.push_back({tokens[0], priority}); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | cv::Mat Yolo::formatToSquare(const cv::Mat &source) | ||||||
|  | { | ||||||
|  | 	int col = source.cols; | ||||||
|  | 	int row = source.rows; | ||||||
|  | 	int _max = MAX(col, row); | ||||||
|  | 	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); | ||||||
|  | 	source.copyTo(result(cv::Rect(0, 0, col, row))); | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int Yolo::getClassForStr(const std::string& str) const | ||||||
|  | { | ||||||
|  | 	for(size_t i = 0; i < classes.size(); ++i) | ||||||
|  | 	{ | ||||||
|  | 		if(classes[i].first == str) | ||||||
|  | 			return i; | ||||||
|  | 	} | ||||||
|  | 	return -1; | ||||||
|  | } | ||||||
							
								
								
									
										65
									
								
								SmartCrop/yolo.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								SmartCrop/yolo.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,65 @@ | ||||||
|  | /* * SmartCrop - A tool for content aware croping of images
 | ||||||
|  |  * Copyright (C) 2024 Carl Philipp Klemm | ||||||
|  |  * | ||||||
|  |  * This file is part of SmartCrop. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * SmartCrop is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with SmartCrop.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <fstream> | ||||||
|  | #include <vector> | ||||||
|  | #include <string> | ||||||
|  | #include <random> | ||||||
|  | #include <filesystem> | ||||||
|  | #include <opencv2/imgproc.hpp> | ||||||
|  | #include <opencv2/opencv.hpp> | ||||||
|  | #include <opencv2/dnn.hpp> | ||||||
|  | 
 | ||||||
|  | class Yolo | ||||||
|  | { | ||||||
|  | public: | ||||||
|  | 	struct Detection | ||||||
|  | 	{ | ||||||
|  | 		int class_id = 0; | ||||||
|  | 		std::string className; | ||||||
|  | 		float confidence = 0.0; | ||||||
|  | 		int priority = -1; | ||||||
|  | 		cv::Scalar color; | ||||||
|  | 		cv::Rect box; | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  | 	static constexpr float modelConfidenceThreshold = 0.25; | ||||||
|  | 	static constexpr float modelScoreThreshold = 0.45; | ||||||
|  | 	static constexpr float modelNMSThreshold = 0.50; | ||||||
|  | 
 | ||||||
|  | 	std::string modelPath; | ||||||
|  | 	std::vector<std::pair<std::string, int>> classes; | ||||||
|  | 	cv::Size2f modelShape; | ||||||
|  | 	bool letterBoxForSquare = true; | ||||||
|  | 	cv::dnn::Net net; | ||||||
|  | 
 | ||||||
|  | 	void loadClasses(const std::string& classes); | ||||||
|  | 	void loadOnnxNetwork(const std::filesystem::path& path); | ||||||
|  | 	cv::Mat formatToSquare(const cv::Mat &source); | ||||||
|  | 	static void clampBox(cv::Rect& box, const cv::Size& size); | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  | 	Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480}, | ||||||
|  | 		const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true); | ||||||
|  | 	std::vector<Detection> runInference(const cv::Mat &input); | ||||||
|  | 	int getClassForStr(const std::string& str) const; | ||||||
|  | }; | ||||||
							
								
								
									
										80
									
								
								Weights/classes.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								Weights/classes.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,80 @@ | ||||||
|  | person, 10 | ||||||
|  | bicycle, 4 | ||||||
|  | car, 3 | ||||||
|  | motorcycle, 4 | ||||||
|  | airplane, 4 | ||||||
|  | bus, 4 | ||||||
|  | train, 4 | ||||||
|  | truck, 3 | ||||||
|  | boat, 4 | ||||||
|  | traffic light, 1 | ||||||
|  | fire hydrant, 1 | ||||||
|  | stop sign, 1 | ||||||
|  | parking meter, 1 | ||||||
|  | bench, 2 | ||||||
|  | bird, 5 | ||||||
|  | cat, 6 | ||||||
|  | dog, 5 | ||||||
|  | horse, 4 | ||||||
|  | sheep, 5 | ||||||
|  | cow, 4 | ||||||
|  | elephant, 5 | ||||||
|  | bear, 5 | ||||||
|  | zebra, 5 | ||||||
|  | giraffe, 5 | ||||||
|  | backpack, 3 | ||||||
|  | umbrella, 3 | ||||||
|  | handbag, 3 | ||||||
|  | tie, 3 | ||||||
|  | suitcase, 2 | ||||||
|  | frisbee, 3 | ||||||
|  | skis, 3 | ||||||
|  | snowboard, 3 | ||||||
|  | sports ball, 3 | ||||||
|  | kite, 4 | ||||||
|  | baseball bat, 3 | ||||||
|  | baseball glove, 3 | ||||||
|  | skateboard, 3 | ||||||
|  | surfboard, 3 | ||||||
|  | tennis racket, 3 | ||||||
|  | bottle, 2 | ||||||
|  | wine glass, 2 | ||||||
|  | cup, 2 | ||||||
|  | fork, 1 | ||||||
|  | knife, 1 | ||||||
|  | spoon, 1 | ||||||
|  | bowl, 1 | ||||||
|  | banana, 1 | ||||||
|  | apple, 1 | ||||||
|  | sandwich,1 | ||||||
|  | orange, 1 | ||||||
|  | broccoli, 1 | ||||||
|  | carrot, 1 | ||||||
|  | hot dog, 1 | ||||||
|  | pizza, 1 | ||||||
|  | donut, 2 | ||||||
|  | cake, 2 | ||||||
|  | chair, 1 | ||||||
|  | couch, 1 | ||||||
|  | potted plant, 1 | ||||||
|  | bed, 1 | ||||||
|  | dining table, 1 | ||||||
|  | toilet, 1 | ||||||
|  | tv, 1 | ||||||
|  | laptop, 1 | ||||||
|  | mouse, 1 | ||||||
|  | remote, 1 | ||||||
|  | keyboard, 1 | ||||||
|  | cell phone, 1 | ||||||
|  | microwave, 1 | ||||||
|  | oven, 1 | ||||||
|  | toaster, 1 | ||||||
|  | sink, 1 | ||||||
|  | refrigerator, 1 | ||||||
|  | book, 1 | ||||||
|  | clock, 1 | ||||||
|  | vase, 1 | ||||||
|  | scissors, 1 | ||||||
|  | teddy bear, 1 | ||||||
|  | hair drier, 1 | ||||||
|  | toothbrush, 1 | ||||||
							
								
								
									
										
											BIN
										
									
								
								Weights/face_detection_yunet_2023mar.onnx
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								Weights/face_detection_yunet_2023mar.onnx
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								Weights/face_recognition_sface_2021dec.onnx
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								Weights/face_recognition_sface_2021dec.onnx
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue