initial commit
This commit is contained in:
commit
cd1e2756bc
7
CMakeLists.txt
Normal file
7
CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.6)
|
||||
project(ImageAiUtils)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
|
||||
|
||||
add_subdirectory(SmartCrop)
|
105
DanbooruTagger/DanbooruTagger.py
Normal file
105
DanbooruTagger/DanbooruTagger.py
Normal file
@ -0,0 +1,105 @@
|
||||
import warnings
|
||||
from deepdanbooru_onnx import DeepDanbooru
|
||||
from PIL import Image
|
||||
import argparse
|
||||
import cv2
|
||||
import os
|
||||
from multiprocessing import Process, Queue
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]):
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
imagebgr = cv2.imread(path)
|
||||
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
image_pil = Image.fromarray(image)
|
||||
yield image_pil, path
|
||||
|
||||
|
||||
def pipeline(queue: Queue, image_paths: list[str], device: int):
|
||||
danbooru = DeepDanbooru()
|
||||
|
||||
for path in image_paths:
|
||||
imageprompt = ""
|
||||
tags = danbooru(path)
|
||||
for tag in tags:
|
||||
imageprompt = imageprompt + ", " + tag
|
||||
|
||||
queue.put({"file_name": path, "text": imageprompt})
|
||||
|
||||
|
||||
def split_list(input_list, count):
|
||||
target_length = int(len(input_list) / count)
|
||||
for i in range(0, count - 1):
|
||||
yield input_list[i * target_length: (i + 1) * target_length]
|
||||
yield input_list[(count - 1) * target_length: len(input_list)]
|
||||
|
||||
|
||||
def save_meta(meta_file, meta, reldir, common_description):
|
||||
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
|
||||
if common_description is not None:
|
||||
meta["text"] = common_description + meta["text"]
|
||||
meta_file.write(json.dumps(meta) + '\n')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru")
|
||||
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
|
||||
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
|
||||
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
|
||||
args = parser.parse_args()
|
||||
|
||||
nparalell = 2
|
||||
|
||||
image_paths = find_image_files(args.image_dir)
|
||||
image_path_chunks = list(split_list(image_paths, nparalell))
|
||||
|
||||
print(f"Will use {nparalell} processies to create tags")
|
||||
|
||||
queue = Queue()
|
||||
processies = list()
|
||||
for i in range(0, nparalell):
|
||||
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i)))
|
||||
processies[-1].start()
|
||||
|
||||
progress = tqdm(desc="Generateing tags", total=len(image_paths))
|
||||
exit = False
|
||||
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
|
||||
while not exit:
|
||||
if not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
exit = True
|
||||
for process in processies:
|
||||
if process.is_alive():
|
||||
exit = False
|
||||
break
|
||||
|
||||
while not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
|
||||
for process in processies:
|
||||
process.join()
|
||||
|
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .deepdanbooru_onnx import DeepDanbooru
|
||||
from .deepdanbooru_onnx import process_image
|
||||
__version__ = '0.0.8'
|
Binary file not shown.
Binary file not shown.
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
@ -0,0 +1,244 @@
|
||||
import onnxruntime as ort
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import hashlib
|
||||
from typing import List, Union
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def process_image(image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Convert an image to a numpy array.
|
||||
:param image: the image to convert
|
||||
:return: the numpy array
|
||||
"""
|
||||
|
||||
image = image.convert("RGB").resize((512, 512))
|
||||
image = np.array(image).astype(np.float32) / 255
|
||||
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
|
||||
return image
|
||||
|
||||
|
||||
def download(url: str, save_path: str, md5: str, length: str) -> bool:
|
||||
"""
|
||||
Download a file from url to save_path.
|
||||
If the file already exists, check its md5.
|
||||
If the md5 matches, return True,if the md5 doesn't match, return False.
|
||||
:param url: the url of the file to download
|
||||
:param save_path: the path to save the file
|
||||
:param md5: the md5 of the file
|
||||
:param length: the length of the file
|
||||
:return: True if the file is downloaded successfully, False otherwise
|
||||
"""
|
||||
|
||||
try:
|
||||
response = requests.get(url=url, stream=True)
|
||||
with open(save_path, "wb") as f:
|
||||
with tqdm.wrapattr(
|
||||
response.raw, "read", total=length, desc="Downloading"
|
||||
) as r_raw:
|
||||
shutil.copyfileobj(r_raw, f)
|
||||
return (
|
||||
True
|
||||
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
|
||||
else False
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
|
||||
def download_model():
|
||||
"""
|
||||
Download the model and tags file from the server.
|
||||
:return: the path to the model and tags file
|
||||
"""
|
||||
|
||||
model_url = (
|
||||
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
|
||||
)
|
||||
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
|
||||
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
|
||||
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
|
||||
model_length = 643993025
|
||||
tags_length = 133810
|
||||
|
||||
home = str(Path.home()) + "/.deepdanbooru_onnx/"
|
||||
if not os.path.exists(home):
|
||||
os.mkdir(home)
|
||||
|
||||
model_name = "deepdanbooru.onnx"
|
||||
tags_name = "tags.txt"
|
||||
|
||||
model_path = home + model_name
|
||||
tags_path = home + tags_name
|
||||
if os.path.exists(model_path):
|
||||
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
|
||||
os.remove(model_path)
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
else:
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
if os.path.exists(tags_path):
|
||||
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
|
||||
os.remove(tags_path)
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
else:
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
return model_path, tags_path
|
||||
|
||||
|
||||
class DeepDanbooru:
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = "auto",
|
||||
model_path: Union[str, None] = None,
|
||||
tags_path: Union[str, None] = None,
|
||||
threshold: Union[float, int] = 0.6,
|
||||
pin_memory: bool = False,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
"""
|
||||
Initialize the DeepDanbooru class.
|
||||
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
|
||||
:param model_path: the path to the model file
|
||||
:param tags_path: the path to the tags file
|
||||
:param threshold: the threshold of the model
|
||||
:param pin_memory: whether to use pin memory
|
||||
:param batch_size: the batch size of the model
|
||||
"""
|
||||
|
||||
providers = {
|
||||
"cpu": "CPUExecutionProvider",
|
||||
"gpu": "CUDAExecutionProvider",
|
||||
"tensorrt": "TensorrtExecutionProvider",
|
||||
"auto": (
|
||||
"CUDAExecutionProvider"
|
||||
if "CUDAExecutionProvider" in ort.get_available_providers()
|
||||
else "CPUExecutionProvider"
|
||||
),
|
||||
}
|
||||
|
||||
if not (isinstance(threshold, float) or isinstance(threshold, int)):
|
||||
raise TypeError("threshold must be float or int")
|
||||
if threshold < 0 or threshold > 1:
|
||||
raise ValueError("threshold must be between 0 and 1")
|
||||
if mode not in providers:
|
||||
raise ValueError(
|
||||
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
|
||||
)
|
||||
if providers[mode] not in ort.get_available_providers():
|
||||
raise ValueError(
|
||||
f"Your device is not supported {mode}. Please choose from: cpu"
|
||||
)
|
||||
if model_path is not None and not os.path.exists(model_path):
|
||||
raise FileNotFoundError("Model file not found")
|
||||
if tags_path is not None and not os.path.exists(tags_path):
|
||||
raise FileNotFoundError("Tags file not found")
|
||||
|
||||
if model_path is None or tags_path is None:
|
||||
model_path, tags_path = download_model()
|
||||
|
||||
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
|
||||
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
|
||||
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
self.output_name = [output.name for output in self.session.get_outputs()]
|
||||
self.threshold = threshold
|
||||
self.pin_memory = pin_memory
|
||||
self.batch_size = batch_size
|
||||
self.mode = mode
|
||||
self.cache = {}
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
def from_image_inference(self, image: Image.Image) -> dict:
|
||||
image = process_image(image)
|
||||
return self.predict(image)
|
||||
|
||||
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
|
||||
if image.shape != (1, 512, 512, 3):
|
||||
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
|
||||
return self.predict(image)
|
||||
|
||||
def from_file_inference(self, image: str) -> dict:
|
||||
return self.from_image_inference(Image.open(image))
|
||||
|
||||
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
|
||||
if self.pin_memory:
|
||||
image = [process_image(Image.open(i)) for i in image]
|
||||
for i in [
|
||||
image[i : i + self.batch_size]
|
||||
for i in range(0, len(image), self.batch_size)
|
||||
]:
|
||||
imagelist = i
|
||||
bs = len(i)
|
||||
_imagelist, idx, hashlist = [], [], []
|
||||
for j in range(len(i)):
|
||||
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
|
||||
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
|
||||
hashlist.append(image_hash)
|
||||
if image_hash in self.cache:
|
||||
continue
|
||||
if not self.pin_memory:
|
||||
_imagelist.append(process_image(img))
|
||||
else:
|
||||
_imagelist.append(imagelist[j])
|
||||
idx.append(j)
|
||||
|
||||
imagelist = _imagelist
|
||||
if len(imagelist) != 0:
|
||||
_image = np.vstack(imagelist)
|
||||
results = self.inference(_image)
|
||||
results_idx = 0
|
||||
else:
|
||||
results = []
|
||||
|
||||
for i in range(bs):
|
||||
image_tag = {}
|
||||
if i in idx:
|
||||
hash = hashlist[i]
|
||||
for tag, score in zip(self.tags, results[results_idx]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
results_idx += 1
|
||||
self.cache[hash] = image_tag
|
||||
yield image_tag
|
||||
else:
|
||||
yield self.cache[hashlist[i]]
|
||||
|
||||
def inference(self, image):
|
||||
return self.session.run(self.output_name, {self.input_name: image})[0]
|
||||
|
||||
def predict(self, image):
|
||||
result = self.inference(image)
|
||||
image_tag = {}
|
||||
for tag, score in zip(self.tags, result[0]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
return image_tag
|
||||
|
||||
def __call__(self, image) -> Union[dict, List[dict]]:
|
||||
if isinstance(image, str):
|
||||
return self.from_file_inference(image)
|
||||
elif isinstance(image, np.ndarray):
|
||||
return self.from_ndarray_inferece(image)
|
||||
elif isinstance(image, list) or isinstance(image, tuple):
|
||||
return self.from_list_inference(image)
|
||||
elif isinstance(image, Image.Image):
|
||||
return self.from_image_inference(image)
|
||||
else:
|
||||
raise ValueError("Image must be a file path or a numpy array or list/tuple")
|
674
LICENSE
Normal file
674
LICENSE
Normal file
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
142
LLavaTagger/LLavaTagger.py
Normal file
142
LLavaTagger/LLavaTagger.py
Normal file
@ -0,0 +1,142 @@
|
||||
import warnings
|
||||
warnings.simplefilter(action='ignore')
|
||||
from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig, logging
|
||||
import argparse
|
||||
import cv2
|
||||
import torch
|
||||
import os
|
||||
import numpy
|
||||
from typing import Iterator
|
||||
from torch.multiprocessing import Process, Queue
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
imagebgr = cv2.imread(path)
|
||||
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
yield image, path
|
||||
|
||||
|
||||
def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
|
||||
model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
|
||||
quantization_config=BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
bnb_4bit_use_double_quant=False,
|
||||
bnb_4bit_quant_type='nf4',
|
||||
), device_map=device, attn_implementation="flash_attention_2")
|
||||
processor = AutoProcessor.from_pretrained(model_name_or_path)
|
||||
image_generator = image_loader(image_paths)
|
||||
|
||||
stop = False
|
||||
finished_count = 0
|
||||
while not stop:
|
||||
prompts = list()
|
||||
images = list()
|
||||
filenames = list()
|
||||
for i in range(0, batch_size):
|
||||
image, filename = next(image_generator, (None, None))
|
||||
if image is None:
|
||||
stop = True
|
||||
break
|
||||
|
||||
filenames.append(filename)
|
||||
images.append(image)
|
||||
prompts.append(prompt)
|
||||
|
||||
if len(images) == 0:
|
||||
break
|
||||
|
||||
inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
|
||||
generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
|
||||
decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
||||
finished_count += len(images)
|
||||
for i, decoded in enumerate(decodes):
|
||||
trim = len(prompt) - len("<image>")
|
||||
queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
|
||||
|
||||
|
||||
def split_list(input_list, count):
|
||||
target_length = int(len(input_list) / count)
|
||||
for i in range(0, count - 1):
|
||||
yield input_list[i * target_length: (i + 1) * target_length]
|
||||
yield input_list[(count - 1) * target_length: len(input_list)]
|
||||
|
||||
|
||||
def save_meta(meta_file, meta, reldir, common_description):
|
||||
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
|
||||
if common_description is not None:
|
||||
meta["text"] = common_description + meta["text"]
|
||||
meta_file.write(json.dumps(meta) + '\n')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("A script to tag images via llava")
|
||||
parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
|
||||
parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
|
||||
parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
|
||||
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
|
||||
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
|
||||
parser.add_argument('--image_dir', '-i', required=True, help="A directory containg the images to tag")
|
||||
args = parser.parse_args()
|
||||
|
||||
prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
|
||||
os.environ["BITSANDBYTES_NOWELCOME"] = "1"
|
||||
|
||||
image_paths = find_image_files(args.image_dir)
|
||||
image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
|
||||
|
||||
print(f"Will use {torch.cuda.device_count()} processies to create tags")
|
||||
|
||||
logging.set_verbosity_error()
|
||||
warnings.filterwarnings("ignore")
|
||||
torch.multiprocessing.set_start_method('spawn')
|
||||
|
||||
queue = Queue()
|
||||
processies = list()
|
||||
for i in range(0, torch.cuda.device_count()):
|
||||
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
|
||||
processies[-1].start()
|
||||
|
||||
progress = tqdm(desc="Generateing tags", total=len(image_paths))
|
||||
exit = False
|
||||
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
|
||||
while not exit:
|
||||
if not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
exit = True
|
||||
for process in processies:
|
||||
if process.is_alive():
|
||||
exit = False
|
||||
break
|
||||
|
||||
while not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
|
||||
for process in processies:
|
||||
process.join()
|
||||
|
21
LLavaTagger/README.md
Normal file
21
LLavaTagger/README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# LLavaTagger
|
||||
|
||||
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
|
||||
|
||||
## How to use
|
||||
|
||||
first create a python venv and install the required packages into it:
|
||||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Then run LLavaTagger for instance like so:
|
||||
|
||||
$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images
|
||||
|
||||
By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus
|
||||
|
||||
LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss)
|
||||
|
||||
If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose
|
11
LLavaTagger/requirements.txt
Normal file
11
LLavaTagger/requirements.txt
Normal file
@ -0,0 +1,11 @@
|
||||
accelerate==0.29.0
|
||||
bitsandbytes
|
||||
huggingface-hub==0.22.2
|
||||
ninja==1.11.1.1
|
||||
safetensors==0.4.2
|
||||
tokenizers==0.15.2
|
||||
transformers
|
||||
torch
|
||||
opencv-python
|
||||
numpy
|
||||
tqdm
|
174
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
174
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
@ -0,0 +1,174 @@
|
||||
#!/bin/python3
|
||||
|
||||
# PersonDatasetAssembler - A tool to assmble images of a specific person from a
|
||||
# directory of images or from a video file
|
||||
# Copyright (C) 2024 Carl Philipp Klemm
|
||||
#
|
||||
# This file is part of PersonDatasetAssembler.
|
||||
#
|
||||
# PersonDatasetAssembler is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# PersonDatasetAssembler is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with PersonDatasetAssembler. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from typing import Iterator
|
||||
import cv2
|
||||
import numpy
|
||||
from tqdm import tqdm
|
||||
from wand.exceptions import BlobError
|
||||
from wand.image import Image
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
image_ext_wand = [".dng", ".arw"]
|
||||
|
||||
|
||||
class LoadException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
if extension in image_ext_ocv:
|
||||
image = cv2.imread(path)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
yield image
|
||||
elif extension in image_ext_wand:
|
||||
try:
|
||||
image = Image(filename=path)
|
||||
except BlobError as e:
|
||||
print(f"Warning: could not load {path}, {e}")
|
||||
continue
|
||||
|
||||
|
||||
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
|
||||
ret = True
|
||||
frame_counter = 0
|
||||
while ret:
|
||||
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
|
||||
ret, frame = video.read()
|
||||
if ret:
|
||||
yield frame
|
||||
frame_counter += interval
|
||||
|
||||
|
||||
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
return 0, False
|
||||
for face in faces:
|
||||
cropped_image = recognizer.alignCrop(image, face)
|
||||
features = recognizer.feature(cropped_image)
|
||||
score_accum = 0.0
|
||||
for referance in referance_features:
|
||||
score_accum += recognizer.match(referance, features, 0)
|
||||
score = score_accum / len(referance_features)
|
||||
if score > thresh:
|
||||
return score, True
|
||||
return 0, False
|
||||
|
||||
|
||||
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
|
||||
images = list()
|
||||
out = list()
|
||||
|
||||
if os.path.isfile(referance_path):
|
||||
image = cv2.imread(referance_path)
|
||||
if image is None:
|
||||
print(f"Could not load image from {referance_path}")
|
||||
else:
|
||||
images.append(image)
|
||||
elif os.path.isdir(referance_path):
|
||||
filenames = find_image_files(referance_path)
|
||||
images = list(image_loader(filenames))
|
||||
|
||||
for image in images:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
print("unable to find face in referance image")
|
||||
exit(1)
|
||||
image = recognizer.alignCrop(image, faces[0])
|
||||
features = recognizer.feature(image)
|
||||
out.append(features)
|
||||
|
||||
return out
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
|
||||
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
|
||||
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
|
||||
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
|
||||
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
|
||||
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
|
||||
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
|
||||
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
|
||||
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
|
||||
args = parser.parse_args()
|
||||
|
||||
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
|
||||
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
|
||||
referance_features = process_referance(detector, recognizer, args.referance)
|
||||
if len(referance_features) < 1:
|
||||
print(f"Could not load any referance image(s) from {args.referance}")
|
||||
exit(1)
|
||||
|
||||
if os.path.isfile(args.input):
|
||||
video = cv2.VideoCapture(args.input)
|
||||
if not video.isOpened():
|
||||
print(f"Unable to open {args.input} as a video file")
|
||||
exit(1)
|
||||
image_generator = extract_video_images(video, args.skip + 1)
|
||||
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
|
||||
elif os.path.isdir(args.input):
|
||||
image_filenams = find_image_files(args.input)
|
||||
image_generator = image_loader(image_filenams)
|
||||
total_images = len(image_filenams)
|
||||
else:
|
||||
print(f"{args.input} is not a video file nor is it a directory")
|
||||
exit(1)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
|
||||
progress = tqdm(total=int(total_images), desc="0.00")
|
||||
counter = 0
|
||||
for image in image_generator:
|
||||
if image.shape[0] > 512:
|
||||
aspect = image.shape[0] / image.shape[1]
|
||||
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
|
||||
else:
|
||||
resized = image
|
||||
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
|
||||
if match and not args.invert or not match and args.invert:
|
||||
filename = f"{counter:04}.png"
|
||||
cv2.imwrite(os.path.join(args.out, filename), image)
|
||||
counter += 1
|
||||
progress.set_description(f"{score:1.2f}")
|
||||
progress.update()
|
||||
|
20
PersonDatasetAssembler/README.md
Normal file
20
PersonDatasetAssembler/README.md
Normal file
@ -0,0 +1,20 @@
|
||||
### PersonDatasetAssembler
|
||||
|
||||
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
|
||||
|
||||
## How to use
|
||||
|
||||
first create a python venv and install the required packages into it:
|
||||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Then run PersonDatasetAssembler for instance like so:
|
||||
|
||||
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson
|
||||
|
||||
Or to extract images from a video:
|
||||
|
||||
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson
|
||||
|
4
PersonDatasetAssembler/requirements.txt
Normal file
4
PersonDatasetAssembler/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
numpy==1.26.4
|
||||
opencv-python==4.10.0.82
|
||||
tqdm==4.66.4
|
||||
Wand==0.6.13
|
35
README.md
Normal file
35
README.md
Normal file
@ -0,0 +1,35 @@
|
||||
# SDImagePreprocess
|
||||
|
||||
This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion.
|
||||
|
||||
## Included tools
|
||||
|
||||
This repo contains the following tools:
|
||||
|
||||
### SmartCrop
|
||||
|
||||
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
|
||||
|
||||
#### Content detected in image:
|
||||
|
||||

|
||||
|
||||
#### Cropped image based on content:
|
||||

|
||||
|
||||
### PersonDatasetAssembler
|
||||
|
||||
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
|
||||
|
||||
### LLavaTagger
|
||||
|
||||
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
|
||||
|
||||
### DanbooruTagger
|
||||
|
||||
DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
All files in this repo are litcenced GPL V3, see LICENSE
|
16
SmartCrop/CMakeLists.txt
Normal file
16
SmartCrop/CMakeLists.txt
Normal file
@ -0,0 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.6)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||
|
||||
add_executable(smartcrop ${SRC_FILES})
|
||||
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(smartcrop PRIVATE -s -g -Wall)
|
||||
message(WARNING ${WEIGHT_DIR})
|
||||
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
|
||||
|
||||
install(TARGETS smartcrop RUNTIME DESTINATION bin)
|
50
SmartCrop/README.md
Normal file
50
SmartCrop/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
# SmartCrop
|
||||
|
||||
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
|
||||
|
||||
## Requirements
|
||||
|
||||
* [cmake](https://cmake.org/) 3.6 or later
|
||||
* [opencv](https://opencv.org/) 4.8 or later
|
||||
* A c++17 capable compiler and standard lib like gcc or llvm/clang
|
||||
* git is required to get the source
|
||||
|
||||
## Building
|
||||
|
||||
The steps to build this application are:
|
||||
|
||||
$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
|
||||
$ cd SDImagePreprocess
|
||||
$ mkdir build
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
The binary can then be found in build/SmartCrop and can optionaly be installed with:
|
||||
|
||||
$ sudo make install
|
||||
|
||||
## Basic usage
|
||||
|
||||
To process all images in the directory ~/images and output the images into ~/proceesedImages:
|
||||
|
||||
$ smartcrop --out processedImages ~/images/*
|
||||
|
||||
To also focus on the person in the image ~/person.jpg
|
||||
|
||||
$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
|
||||
|
||||
To also enable seam carving
|
||||
|
||||
$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
|
||||
|
||||
see smartcrop --help for more
|
||||
|
||||
## Example
|
||||
|
||||
#### Content detected in image:
|
||||

|
||||
|
||||
#### Cropped image based on content:
|
||||

|
||||
|
||||
|
163
SmartCrop/facerecognizer.cpp
Normal file
163
SmartCrop/facerecognizer.cpp
Normal file
@ -0,0 +1,163 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "facerecognizer.h"
|
||||
#include <filesystem>
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
|
||||
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <fstream>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
|
||||
|
||||
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
if(detectorPath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face detection model";
|
||||
|
||||
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from built in file");
|
||||
}
|
||||
else
|
||||
{
|
||||
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from "+detectorPath.string());
|
||||
}
|
||||
|
||||
bool defaultNetwork = recognizerPath.empty();
|
||||
|
||||
if(defaultNetwork)
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face recognition model";
|
||||
recognizerPath = cv::tempfile("onnx");
|
||||
std::ofstream file(recognizerPath);
|
||||
if(!file.is_open())
|
||||
throw LoadException("Unable open temporary file at "+recognizerPath.string());
|
||||
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
|
||||
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
|
||||
file.close();
|
||||
}
|
||||
|
||||
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
|
||||
if(defaultNetwork)
|
||||
std::filesystem::remove(recognizerPath);
|
||||
|
||||
if(!recognizer)
|
||||
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
|
||||
|
||||
addReferances(referances);
|
||||
}
|
||||
|
||||
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
|
||||
{
|
||||
detector->setInputSize(input.size());
|
||||
cv::Mat faces;
|
||||
detector->detect(input, faces);
|
||||
return faces;
|
||||
}
|
||||
|
||||
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
bool ret = false;
|
||||
for(const cv::Mat& image : referances)
|
||||
{
|
||||
cv::Mat faces = detectFaces(image);
|
||||
assert(faces.cols == 15);
|
||||
if(faces.empty())
|
||||
{
|
||||
Log(Log::WARN)<<"A referance image provided dose not contian any face";
|
||||
continue;
|
||||
}
|
||||
if(faces.rows > 1)
|
||||
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
|
||||
cv::Mat cropedImage;
|
||||
recognizer->alignCrop(image, faces.row(0), cropedImage);
|
||||
cv::Mat features;
|
||||
recognizer->feature(cropedImage, features);
|
||||
referanceFeatures.push_back(features.clone());
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FaceRecognizer::setThreshold(double threasholdIn)
|
||||
{
|
||||
threshold = threasholdIn;
|
||||
}
|
||||
|
||||
double FaceRecognizer::getThreshold()
|
||||
{
|
||||
return threshold;
|
||||
}
|
||||
|
||||
void FaceRecognizer::clearReferances()
|
||||
{
|
||||
referanceFeatures.clear();
|
||||
}
|
||||
|
||||
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||
{
|
||||
cv::Mat faces = detectFaces(input);
|
||||
|
||||
Detection bestMatch;
|
||||
bestMatch.confidence = 0;
|
||||
bestMatch.person = -1;
|
||||
|
||||
if(alone && faces.rows > 1)
|
||||
{
|
||||
bestMatch.person = -2;
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
for(int i = 0; i < faces.rows; ++i)
|
||||
{
|
||||
cv::Mat face;
|
||||
recognizer->alignCrop(input, faces.row(i), face);
|
||||
cv::Mat features;
|
||||
recognizer->feature(face, features);
|
||||
features = features.clone();
|
||||
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||
{
|
||||
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||
if(score > threshold && score > bestMatch.confidence)
|
||||
{
|
||||
bestMatch.confidence = score;
|
||||
bestMatch.person = referanceIndex;
|
||||
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
67
SmartCrop/facerecognizer.h
Normal file
67
SmartCrop/facerecognizer.h
Normal file
@ -0,0 +1,67 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <exception>
|
||||
#include <opencv2/core/mat.hpp>
|
||||
#include <opencv2/objdetect/face.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <filesystem>
|
||||
|
||||
class FaceRecognizer
|
||||
{
|
||||
public:
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int person;
|
||||
float confidence;
|
||||
cv::Rect rect;
|
||||
};
|
||||
|
||||
class LoadException : public std::exception
|
||||
{
|
||||
private:
|
||||
std::string message;
|
||||
public:
|
||||
LoadException(const std::string& msg): std::exception(), message(msg) {}
|
||||
virtual const char* what() const throw() override
|
||||
{
|
||||
return message.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
std::vector<cv::Mat> referanceFeatures;
|
||||
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
|
||||
std::shared_ptr<cv::FaceDetectorYN> detector;
|
||||
|
||||
double threshold = 0.363;
|
||||
|
||||
public:
|
||||
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||
cv::Mat detectFaces(const cv::Mat& input);
|
||||
Detection isMatch(const cv::Mat& input, bool alone = false);
|
||||
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||
void setThreshold(double threashold);
|
||||
double getThreshold();
|
||||
void clearReferances();
|
||||
};
|
BIN
SmartCrop/images/IMGP3692.jpg
Normal file
BIN
SmartCrop/images/IMGP3692.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 251 KiB |
BIN
SmartCrop/images/IMGP3692C.jpg
Normal file
BIN
SmartCrop/images/IMGP3692C.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 97 KiB |
495
SmartCrop/incbin.h
Normal file
495
SmartCrop/incbin.h
Normal file
@ -0,0 +1,495 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file incbin.h
|
||||
* @author Dale Weiler
|
||||
* @brief Utility for including binary files
|
||||
*
|
||||
* Facilities for including binary files into the current translation unit and
|
||||
* making use from them externally in other translation units.
|
||||
*/
|
||||
#ifndef INCBIN_HDR
|
||||
#define INCBIN_HDR
|
||||
#include <limits.h>
|
||||
#if defined(__AVX512BW__) || \
|
||||
defined(__AVX512CD__) || \
|
||||
defined(__AVX512DQ__) || \
|
||||
defined(__AVX512ER__) || \
|
||||
defined(__AVX512PF__) || \
|
||||
defined(__AVX512VL__) || \
|
||||
defined(__AVX512F__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 6
|
||||
#elif defined(__AVX__) || \
|
||||
defined(__AVX2__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 5
|
||||
#elif defined(__SSE__) || \
|
||||
defined(__SSE2__) || \
|
||||
defined(__SSE3__) || \
|
||||
defined(__SSSE3__) || \
|
||||
defined(__SSE4_1__) || \
|
||||
defined(__SSE4_2__) || \
|
||||
defined(__neon__) || \
|
||||
defined(__ARM_NEON) || \
|
||||
defined(__ALTIVEC__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 4
|
||||
#elif ULONG_MAX != 0xffffffffu
|
||||
# define INCBIN_ALIGNMENT_INDEX 3
|
||||
# else
|
||||
# define INCBIN_ALIGNMENT_INDEX 2
|
||||
#endif
|
||||
|
||||
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
|
||||
#define INCBIN_ALIGN_SHIFT_0 1
|
||||
#define INCBIN_ALIGN_SHIFT_1 2
|
||||
#define INCBIN_ALIGN_SHIFT_2 4
|
||||
#define INCBIN_ALIGN_SHIFT_3 8
|
||||
#define INCBIN_ALIGN_SHIFT_4 16
|
||||
#define INCBIN_ALIGN_SHIFT_5 32
|
||||
#define INCBIN_ALIGN_SHIFT_6 64
|
||||
|
||||
/* Actual alignment value */
|
||||
#define INCBIN_ALIGNMENT \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
|
||||
INCBIN_ALIGNMENT_INDEX)
|
||||
|
||||
/* Stringize */
|
||||
#define INCBIN_STR(X) \
|
||||
#X
|
||||
#define INCBIN_STRINGIZE(X) \
|
||||
INCBIN_STR(X)
|
||||
/* Concatenate */
|
||||
#define INCBIN_CAT(X, Y) \
|
||||
X ## Y
|
||||
#define INCBIN_CONCATENATE(X, Y) \
|
||||
INCBIN_CAT(X, Y)
|
||||
/* Deferred macro expansion */
|
||||
#define INCBIN_EVAL(X) \
|
||||
X
|
||||
#define INCBIN_INVOKE(N, ...) \
|
||||
INCBIN_EVAL(N(__VA_ARGS__))
|
||||
/* Variable argument count for overloading by arity */
|
||||
#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
|
||||
#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
|
||||
|
||||
/* Green Hills uses a different directive for including binary data */
|
||||
#if defined(__ghs__)
|
||||
# if (__ghs_asm == 2)
|
||||
# define INCBIN_MACRO ".file"
|
||||
/* Or consider the ".myrawdata" entry in the ld file */
|
||||
# else
|
||||
# define INCBIN_MACRO "\tINCBIN"
|
||||
# endif
|
||||
#else
|
||||
# define INCBIN_MACRO ".incbin"
|
||||
#endif
|
||||
|
||||
#ifndef _MSC_VER
|
||||
# define INCBIN_ALIGN \
|
||||
__attribute__((aligned(INCBIN_ALIGNMENT)))
|
||||
#else
|
||||
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || /* GNU C and RealView */ \
|
||||
defined(__arm) || /* Diab */ \
|
||||
defined(_ARM) /* ImageCraft */
|
||||
# define INCBIN_ARM
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Utilize .balign where supported */
|
||||
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".balign 1\n"
|
||||
#elif defined(INCBIN_ARM)
|
||||
/*
|
||||
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
|
||||
* the shift count. This is the value passed to `.align'
|
||||
*/
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 0\n"
|
||||
#else
|
||||
/* We assume other inline assembler's treat `.align' as `.balign' */
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 1\n"
|
||||
#endif
|
||||
|
||||
/* INCBIN_CONST is used by incbin.c generated files */
|
||||
#if defined(__cplusplus)
|
||||
# define INCBIN_EXTERNAL extern "C"
|
||||
# define INCBIN_CONST extern const
|
||||
#else
|
||||
# define INCBIN_EXTERNAL extern
|
||||
# define INCBIN_CONST const
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which size and data is
|
||||
* emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_SECTION)
|
||||
# if defined(__APPLE__)
|
||||
# define INCBIN_OUTPUT_SECTION ".const_data"
|
||||
# else
|
||||
# define INCBIN_OUTPUT_SECTION ".rodata"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which data is emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_DATA_SECTION)
|
||||
# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which size is emitted.
|
||||
*
|
||||
* @warning If you use this facility, you might have to deal with
|
||||
* platform-specific linker output section naming on your own.
|
||||
*
|
||||
* @note This is useful for Harvard architectures where program memory cannot
|
||||
* be directly read from the program without special instructions. With this you
|
||||
* can chose to put the size variable in RAM rather than ROM.
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
|
||||
# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
# include "TargetConditionals.h"
|
||||
# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
|
||||
# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
|
||||
# endif
|
||||
/* The directives are different for Apple branded compilers */
|
||||
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# define INCBIN_INT ".long "
|
||||
# define INCBIN_MANGLE "_"
|
||||
# define INCBIN_BYTE ".byte "
|
||||
# define INCBIN_TYPE(...)
|
||||
#else
|
||||
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# if defined(__ghs__)
|
||||
# define INCBIN_INT ".word "
|
||||
# else
|
||||
# define INCBIN_INT ".int "
|
||||
# endif
|
||||
# if defined(__USER_LABEL_PREFIX__)
|
||||
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
|
||||
# else
|
||||
# define INCBIN_MANGLE ""
|
||||
# endif
|
||||
# if defined(INCBIN_ARM)
|
||||
/* On arm assemblers, `@' is used as a line comment token */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
|
||||
# elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
/* Mingw doesn't support this directive either */
|
||||
# define INCBIN_TYPE(NAME)
|
||||
# else
|
||||
/* It's safe to use `@' on other architectures */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
|
||||
# endif
|
||||
# define INCBIN_BYTE ".byte "
|
||||
#endif
|
||||
|
||||
/* List of style types used for symbol names */
|
||||
#define INCBIN_STYLE_CAMEL 0
|
||||
#define INCBIN_STYLE_SNAKE 1
|
||||
|
||||
/**
|
||||
* @brief Specify the prefix to use for symbol names.
|
||||
*
|
||||
* @note By default this is "g".
|
||||
*
|
||||
* @code
|
||||
* #define INCBIN_PREFIX incbin
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols instead:
|
||||
* // const unsigned char incbinFoo<data>[];
|
||||
* // const unsigned char *const incbinFoo<end>;
|
||||
* // const unsigned int incbinFoo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_PREFIX)
|
||||
# define INCBIN_PREFIX g
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Specify the style used for symbol names.
|
||||
*
|
||||
* Possible options are
|
||||
* - INCBIN_STYLE_CAMEL "CamelCase"
|
||||
* - INCBIN_STYLE_SNAKE "snake_case"
|
||||
*
|
||||
* @note By default this is INCBIN_STYLE_CAMEL
|
||||
*
|
||||
* @code
|
||||
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
|
||||
* #include "incbin.h"
|
||||
* INCBIN(foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>foo_data[];
|
||||
* // const unsigned char *const <prefix>foo_end;
|
||||
* // const unsigned int <prefix>foo_size;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_STYLE)
|
||||
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
|
||||
#endif
|
||||
|
||||
/* Style lookup tables */
|
||||
#define INCBIN_STYLE_0_DATA Data
|
||||
#define INCBIN_STYLE_0_END End
|
||||
#define INCBIN_STYLE_0_SIZE Size
|
||||
#define INCBIN_STYLE_1_DATA _data
|
||||
#define INCBIN_STYLE_1_END _end
|
||||
#define INCBIN_STYLE_1_SIZE _size
|
||||
|
||||
/* Style lookup: returning identifier */
|
||||
#define INCBIN_STYLE_IDENT(TYPE) \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_STYLE_, \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_EVAL(INCBIN_STYLE), \
|
||||
INCBIN_CONCATENATE(_, TYPE)))
|
||||
|
||||
/* Style lookup: returning string literal */
|
||||
#define INCBIN_STYLE_STRING(TYPE) \
|
||||
INCBIN_STRINGIZE( \
|
||||
INCBIN_STYLE_IDENT(TYPE)) \
|
||||
|
||||
/* Generate the global labels by indirectly invoking the macro with our style
|
||||
* type and concatenating the name against them. */
|
||||
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_GLOBAL, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE))) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_TYPE, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE)))
|
||||
|
||||
/**
|
||||
* @brief Externally reference binary data included in another translation unit.
|
||||
*
|
||||
* Produces three external symbols that reference the binary data included in
|
||||
* another translation unit.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
||||
* @param NAME The name given for the binary data
|
||||
*
|
||||
* @code
|
||||
* INCBIN_EXTERN(Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const unsigned char <prefix>Foo<data>[];
|
||||
* // extern const unsigned char *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*
|
||||
* You may specify a custom optional data type as well as the first argument.
|
||||
* @code
|
||||
* INCBIN_EXTERN(custom_type, Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const custom_type <prefix>Foo<data>[];
|
||||
* // extern const custom_type *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#define INCBIN_EXTERN(...) \
|
||||
INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
||||
#define INCBIN_EXTERN_1(NAME, ...) \
|
||||
INCBIN_EXTERN_2(unsigned char, NAME)
|
||||
#define INCBIN_EXTERN_2(TYPE, NAME) \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(DATA))[]; \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(END)); \
|
||||
INCBIN_EXTERNAL const unsigned int \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(SIZE))
|
||||
|
||||
/**
|
||||
* @brief Externally reference textual data included in another translation unit.
|
||||
*
|
||||
* Produces three external symbols that reference the textual data included in
|
||||
* another translation unit.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name given for the textual data
|
||||
*
|
||||
* @code
|
||||
* INCBIN_EXTERN(Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const char <prefix>Foo<data>[];
|
||||
* // extern const char *const <prefix>Foo<end>;
|
||||
* // extern const unsigned int <prefix>Foo<size>;
|
||||
* @endcode
|
||||
*/
|
||||
#define INCTXT_EXTERN(NAME) \
|
||||
INCBIN_EXTERN_2(char, NAME)
|
||||
|
||||
/**
|
||||
* @brief Include a binary file into the current translation unit.
|
||||
*
|
||||
* Includes a binary file into the current translation unit, producing three symbols
|
||||
* for objects that encode the data and size respectively.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
||||
* @param NAME The name to associate with this binary data (as an identifier.)
|
||||
* @param FILENAME The file to include (as a string literal.)
|
||||
*
|
||||
* @code
|
||||
* INCBIN(Icon, "icon.png");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>Icon<data>[];
|
||||
* // const unsigned char *const <prefix>Icon<end>;
|
||||
* // const unsigned int <prefix>Icon<size>;
|
||||
* @endcode
|
||||
*
|
||||
* You may specify a custom optional data type as well as the first argument.
|
||||
* These macros are specialized by arity.
|
||||
* @code
|
||||
* INCBIN(custom_type, Icon, "icon.png");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const custom_type <prefix>Icon<data>[];
|
||||
* // const custom_type *const <prefix>Icon<end>;
|
||||
* // const unsigned int <prefix>Icon<size>;
|
||||
* @endcode
|
||||
*
|
||||
* @warning This must be used in global scope
|
||||
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
||||
*
|
||||
* To externally reference the data included by this in another translation unit
|
||||
* please @see INCBIN_EXTERN.
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
# define INCBIN(NAME, FILENAME) \
|
||||
INCBIN_EXTERN(NAME)
|
||||
#else
|
||||
# define INCBIN(...) \
|
||||
INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
||||
# if defined(__GNUC__)
|
||||
# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
|
||||
# elif defined(__clang__)
|
||||
# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
|
||||
# else
|
||||
# define INCBIN_1(...) /* Cannot do anything here */
|
||||
# endif
|
||||
# define INCBIN_2(NAME, FILENAME) \
|
||||
INCBIN_3(unsigned char, NAME, FILENAME)
|
||||
# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
|
||||
# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
|
||||
__asm__(INCBIN_SECTION \
|
||||
INCBIN_GLOBAL_LABELS(NAME, DATA) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
|
||||
INCBIN_MACRO " \"" FILENAME "\"\n" \
|
||||
TERMINATOR \
|
||||
INCBIN_GLOBAL_LABELS(NAME, END) \
|
||||
INCBIN_ALIGN_BYTE \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
|
||||
INCBIN_BYTE "1\n" \
|
||||
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
|
||||
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
|
||||
INCBIN_ALIGN_HOST \
|
||||
".text\n" \
|
||||
); \
|
||||
INCBIN_EXTERN(TYPE, NAME)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Include a textual file into the current translation unit.
|
||||
*
|
||||
* This behaves the same as INCBIN except it produces char compatible arrays
|
||||
* and implicitly adds a null-terminator byte, thus the size of data included
|
||||
* by this is one byte larger than that of INCBIN.
|
||||
*
|
||||
* Includes a textual file into the current translation unit, producing three
|
||||
* symbols for objects that encode the data and size respectively.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name to associate with this binary data (as an identifier.)
|
||||
* @param FILENAME The file to include (as a string literal.)
|
||||
*
|
||||
* @code
|
||||
* INCTXT(Readme, "readme.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const char <prefix>Readme<data>[];
|
||||
* // const char *const <prefix>Readme<end>;
|
||||
* // const unsigned int <prefix>Readme<size>;
|
||||
* @endcode
|
||||
*
|
||||
* @warning This must be used in global scope
|
||||
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
||||
*
|
||||
* To externally reference the data included by this in another translation unit
|
||||
* please @see INCBIN_EXTERN.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define INCTXT(NAME, FILENAME) \
|
||||
INCBIN_EXTERN(NAME)
|
||||
#else
|
||||
# define INCTXT(NAME, FILENAME) \
|
||||
INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
|
||||
#endif
|
||||
|
||||
#endif
|
128
SmartCrop/intelligentroi.cpp
Normal file
128
SmartCrop/intelligentroi.cpp
Normal file
@ -0,0 +1,128 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "intelligentroi.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "utils.h"
|
||||
#include "log.h"
|
||||
|
||||
bool InteligentRoi::compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center)
|
||||
{
|
||||
if(a.second != b.second)
|
||||
return a.second > b.second;
|
||||
|
||||
double distA = pointDist(a.first, center);
|
||||
double distB = pointDist(b.first, center);
|
||||
|
||||
return distA < distB;
|
||||
}
|
||||
|
||||
void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
|
||||
{
|
||||
if(!pointInRect(point, rect))
|
||||
{
|
||||
if(point.x < rect.x)
|
||||
rect.x = point.x;
|
||||
else if(point.x > rect.x+rect.width)
|
||||
rect.x = point.x-rect.width;
|
||||
if(point.y < rect.y)
|
||||
rect.y = point.y;
|
||||
else if(point.y > rect.y+rect.height)
|
||||
rect.y = point.y-rect.height;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
||||
{
|
||||
incompleate = false;
|
||||
int diameter = std::min(imageSize.height, imageSize.width);
|
||||
cv::Point2i point(imageSize.width/2, imageSize.height/2);
|
||||
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
|
||||
|
||||
std::sort(mustInclude.begin(), mustInclude.end(),
|
||||
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
|
||||
|
||||
while(true)
|
||||
{
|
||||
cv::Rect includeRect = rectFromPoints(mustInclude);
|
||||
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
|
||||
{
|
||||
incompleate = true;
|
||||
slideRectToPoint(candiate, mustInclude.back().first);
|
||||
mustInclude.pop_back();
|
||||
Log(Log::DEBUG)<<"cant fill";
|
||||
for(const std::pair<cv::Point2i, int>& mipoint : mustInclude)
|
||||
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
|
||||
slideRectToPoint(candiate, includePoint.first);
|
||||
|
||||
if(candiate.x < 0)
|
||||
candiate.x = 0;
|
||||
if(candiate.y < 0)
|
||||
candiate.y = 0;
|
||||
if(candiate.x+candiate.width > imageSize.width)
|
||||
candiate.width = imageSize.width-candiate.x;
|
||||
if(candiate.y+candiate.height > imageSize.height)
|
||||
candiate.height = imageSize.height-candiate.y;
|
||||
|
||||
return candiate;
|
||||
}
|
||||
|
||||
InteligentRoi::InteligentRoi(const Yolo& yolo)
|
||||
{
|
||||
personId = yolo.getClassForStr("person");
|
||||
}
|
||||
|
||||
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
||||
{
|
||||
std::vector<std::pair<cv::Point2i, int>> corners;
|
||||
for(size_t i = 0; i < detections.size(); ++i)
|
||||
{
|
||||
int priority = detections[i].priority;
|
||||
if(detections[i].class_id == personId)
|
||||
{
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
|
||||
corners.push_back({detections[i].box.tl(), priority+1});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
else
|
||||
{
|
||||
corners.push_back({detections[i].box.tl(), priority});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
}
|
||||
|
||||
bool incompleate;
|
||||
out = maxRect(incompleate, imageSize, corners);
|
||||
return incompleate;
|
||||
}
|
37
SmartCrop/intelligentroi.h
Normal file
37
SmartCrop/intelligentroi.h
Normal file
@ -0,0 +1,37 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
class InteligentRoi
|
||||
{
|
||||
private:
|
||||
int personId;
|
||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
|
||||
public:
|
||||
InteligentRoi(const Yolo& yolo);
|
||||
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
};
|
63
SmartCrop/log.cpp
Normal file
63
SmartCrop/log.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Lubricant Detecter
|
||||
* Copyright (C) 2021 Carl Klemm
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "log.h"
|
||||
|
||||
Log::Log(Level type, bool endlineI): endline(endlineI)
|
||||
{
|
||||
msglevel = type;
|
||||
if(headers)
|
||||
{
|
||||
operator << ("["+getLabel(type)+"] ");
|
||||
}
|
||||
}
|
||||
|
||||
Log::~Log()
|
||||
{
|
||||
if(opened && endline)
|
||||
{
|
||||
std::cout<<'\n';
|
||||
}
|
||||
opened = false;
|
||||
}
|
||||
|
||||
|
||||
std::string Log::getLabel(Level level)
|
||||
{
|
||||
std::string label;
|
||||
switch(level)
|
||||
{
|
||||
case DEBUG:
|
||||
label = "DEBUG";
|
||||
break;
|
||||
case INFO:
|
||||
label = "INFO ";
|
||||
break;
|
||||
case WARN:
|
||||
label = "WARN ";
|
||||
break;
|
||||
case ERROR:
|
||||
label = "ERROR";
|
||||
break;
|
||||
}
|
||||
return label;
|
||||
}
|
||||
|
||||
bool Log::headers = false;
|
||||
Log::Level Log::level = WARN;
|
64
SmartCrop/log.h
Normal file
64
SmartCrop/log.h
Normal file
@ -0,0 +1,64 @@
|
||||
/**
|
||||
* eisgenerator
|
||||
* Copyright (C) 2021 Carl Klemm
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
class Log
|
||||
{
|
||||
public:
|
||||
|
||||
enum Level
|
||||
{
|
||||
DEBUG,
|
||||
INFO,
|
||||
WARN,
|
||||
ERROR
|
||||
};
|
||||
|
||||
private:
|
||||
bool opened = false;
|
||||
Level msglevel = DEBUG;
|
||||
bool endline = true;
|
||||
|
||||
std::string getLabel(Level level);
|
||||
|
||||
public:
|
||||
|
||||
static bool headers;
|
||||
static Level level;
|
||||
|
||||
Log() {}
|
||||
Log(Level type, bool endlineI = true);
|
||||
~Log();
|
||||
|
||||
template<class T> Log &operator<<(const T &msg)
|
||||
{
|
||||
if(msglevel >= level)
|
||||
{
|
||||
if(msglevel == ERROR)
|
||||
std::cerr<<msg;
|
||||
else
|
||||
std::cout<<msg;
|
||||
opened = true;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
460
SmartCrop/main.cpp
Normal file
460
SmartCrop/main.cpp
Normal file
@ -0,0 +1,460 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "log.h"
|
||||
#include "options.h"
|
||||
#include "utils.h"
|
||||
#include "intelligentroi.h"
|
||||
#include "seamcarving.h"
|
||||
#include "facerecognizer.h"
|
||||
|
||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||
{
|
||||
const Yolo::Detection* inDetection = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(ignore && ignore == &detection)
|
||||
continue;
|
||||
|
||||
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
|
||||
{
|
||||
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
||||
inDetection = &detection;
|
||||
}
|
||||
}
|
||||
return inDetection;
|
||||
}
|
||||
|
||||
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
||||
{
|
||||
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
||||
|
||||
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
|
||||
|
||||
if(!inDetection)
|
||||
{
|
||||
const Yolo::Detection* closest = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(detection.box.x > x)
|
||||
{
|
||||
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
||||
closest = &detection;
|
||||
}
|
||||
}
|
||||
if(closest)
|
||||
x = closest->box.x;
|
||||
else
|
||||
x = imgSizeX;
|
||||
|
||||
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = inDetection->box.br().x;
|
||||
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
|
||||
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
||||
if(candidateDetection && candidateDetection->box.br().x > x)
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is again in a box";
|
||||
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is not in a box";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> out;
|
||||
|
||||
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
|
||||
|
||||
for(int x = 0; x < image.cols; ++x)
|
||||
{
|
||||
int start = x;
|
||||
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
||||
|
||||
int width = x-start;
|
||||
if(x < image.cols)
|
||||
++width;
|
||||
cv::Rect rect(start, 0, width, image.rows);
|
||||
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
|
||||
cv::Mat slice = image(rect);
|
||||
out.push_back({slice, frozen});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
|
||||
{
|
||||
assert(!slices.empty());
|
||||
|
||||
int cols = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
cols += slice.first.cols;
|
||||
|
||||
|
||||
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
|
||||
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
|
||||
|
||||
int col = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
|
||||
Log(Log::DEBUG)<<__func__<<' '<<rect;
|
||||
slice.first.copyTo(image(rect));
|
||||
col += slice.first.cols-1;
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
void transposeRect(cv::Rect& rect)
|
||||
{
|
||||
int x = rect.x;
|
||||
rect.x = rect.y;
|
||||
rect.y = x;
|
||||
|
||||
int width = rect.width;
|
||||
rect.width = rect.height;
|
||||
rect.height = width;
|
||||
}
|
||||
|
||||
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
|
||||
{
|
||||
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
|
||||
|
||||
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
||||
|
||||
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
|
||||
|
||||
bool vertical = false;
|
||||
if(aspectRatio > targetAspectRatio)
|
||||
vertical = true;
|
||||
|
||||
int requiredLines = 0;
|
||||
if(!vertical)
|
||||
requiredLines = image.rows*targetAspectRatio - image.cols;
|
||||
else
|
||||
requiredLines = image.cols/targetAspectRatio - image.rows;
|
||||
|
||||
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
||||
|
||||
if(vertical)
|
||||
{
|
||||
cv::transpose(image, image);
|
||||
for(Yolo::Detection& detection : detections)
|
||||
transposeRect(detection.box);
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
||||
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
|
||||
int totalResizableSize = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
|
||||
if(!slice.second)
|
||||
totalResizableSize += slice.first.cols;
|
||||
}
|
||||
|
||||
if(totalResizableSize < requiredLines+1)
|
||||
{
|
||||
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<int> seamsForSlice(slices.size(), 0);
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
||||
}
|
||||
|
||||
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
|
||||
for(ssize_t i = slices.size()-1; i >= 0; --i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
{
|
||||
seamsForSlice[i] += residual;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(seamsForSlice[i] != 0)
|
||||
{
|
||||
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
|
||||
if(!ret)
|
||||
{
|
||||
if(vertical)
|
||||
transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image = assembleFromSlicesHoriz(slices);
|
||||
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
cv::rectangle(image, detection.box, detection.color, 3);
|
||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
||||
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
|
||||
}
|
||||
|
||||
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
||||
}
|
||||
|
||||
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
||||
{
|
||||
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
|
||||
if(std::max(image.cols, image.rows) > longTargetSize)
|
||||
{
|
||||
if(image.cols > image.rows)
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.cols;
|
||||
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.rows;
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
InteligentRoi intRoi(yolo);
|
||||
cv::Mat image = cv::imread(path);
|
||||
if(!image.data)
|
||||
{
|
||||
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
||||
return;
|
||||
}
|
||||
|
||||
reduceSize(image, config.targetSize);
|
||||
|
||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||
|
||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||
for(Yolo::Detection& detection : detections)
|
||||
{
|
||||
bool hasmatch = false;
|
||||
if(recognizer && detection.className == "person")
|
||||
{
|
||||
cv::Mat person = image(detection.box);
|
||||
reconizerMutex.lock();
|
||||
FaceRecognizer::Detection match = recognizer->isMatch(person);
|
||||
reconizerMutex.unlock();
|
||||
if(match.person >= 0)
|
||||
{
|
||||
detection.priority += 10;
|
||||
hasmatch = true;
|
||||
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
|
||||
}
|
||||
}
|
||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||
}
|
||||
|
||||
cv::Rect crop;
|
||||
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.seamCarving && incompleate)
|
||||
{
|
||||
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
||||
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
||||
{
|
||||
detections = yolo.runInference(image);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat croppedImage;
|
||||
|
||||
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
|
||||
{
|
||||
intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.debug)
|
||||
{
|
||||
cv::Mat debugImage = image.clone();
|
||||
drawDebugInfo(debugImage, crop, detections);
|
||||
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else if(!incompleate)
|
||||
{
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else
|
||||
{
|
||||
croppedImage = image;
|
||||
}
|
||||
|
||||
cv::Mat resizedImage;
|
||||
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
|
||||
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
||||
for(std::filesystem::path path : images)
|
||||
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
|
||||
{
|
||||
std::vector<std::vector<T>> out;
|
||||
|
||||
size_t length = vec.size()/parts;
|
||||
size_t remain = vec.size() % parts;
|
||||
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
|
||||
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
|
||||
{
|
||||
end += (remain > 0) ? (length + !!(remain--)) : length;
|
||||
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
|
||||
begin = end;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
Log::level = Log::INFO;
|
||||
|
||||
Config config;
|
||||
argp_parse(&argp, argc, argv, 0, 0, &config);
|
||||
|
||||
if(config.outputDir.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(config.imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"at least one input image or directory is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
|
||||
for(const std::filesystem::path& path : config.imagePaths)
|
||||
getImageFiles(path, imagePaths);
|
||||
|
||||
Log(Log::DEBUG)<<"Images:";
|
||||
for(const::std::filesystem::path& path: imagePaths)
|
||||
Log(Log::DEBUG)<<path;
|
||||
|
||||
if(imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"no image was found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!std::filesystem::exists(config.outputDir))
|
||||
{
|
||||
if(!std::filesystem::create_directory(config.outputDir))
|
||||
{
|
||||
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
||||
if(config.debug)
|
||||
{
|
||||
if(!std::filesystem::exists(debugOutputPath))
|
||||
std::filesystem::create_directory(debugOutputPath);
|
||||
}
|
||||
|
||||
FaceRecognizer* recognizer = nullptr;
|
||||
std::mutex recognizerMutex;
|
||||
if(!config.focusPersonImage.empty())
|
||||
{
|
||||
cv::Mat personImage = cv::imread(config.focusPersonImage);
|
||||
if(personImage.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
|
||||
return 1;
|
||||
}
|
||||
recognizer = new FaceRecognizer();
|
||||
recognizer->addReferances({personImage});
|
||||
recognizer->setThreshold(config.threshold);
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
|
||||
|
||||
for(size_t i = 0; i < imagePathParts.size(); ++i)
|
||||
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
|
||||
|
||||
for(std::thread& thread : threads)
|
||||
thread.join();
|
||||
|
||||
return 0;
|
||||
}
|
117
SmartCrop/options.h
Normal file
117
SmartCrop/options.h
Normal file
@ -0,0 +1,117 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <argp.h>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include "log.h"
|
||||
|
||||
const char *argp_program_version = "AIImagePreprocesses";
|
||||
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
||||
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
||||
static char args_doc[] = "FILE(S)";
|
||||
|
||||
static struct argp_option options[] =
|
||||
{
|
||||
{"verbose", 'v', 0, 0, "Show debug messages" },
|
||||
{"quiet", 'q', 0, 0, "only output data" },
|
||||
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||
{"debug", 'd', 0, 0, "output debug images" },
|
||||
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
|
||||
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
|
||||
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
|
||||
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
|
||||
{0}
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
std::filesystem::path modelPath;
|
||||
std::filesystem::path classesPath;
|
||||
std::filesystem::path outputDir;
|
||||
std::filesystem::path focusPersonImage;
|
||||
bool seamCarving = false;
|
||||
bool debug = false;
|
||||
double threshold = 0.363;
|
||||
cv::Size targetSize = cv::Size(512, 512);
|
||||
};
|
||||
|
||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
Config *config = reinterpret_cast<Config*>(state->input);
|
||||
try
|
||||
{
|
||||
switch (key)
|
||||
{
|
||||
case 'q':
|
||||
Log::level = Log::ERROR;
|
||||
break;
|
||||
case 'v':
|
||||
Log::level = Log::DEBUG;
|
||||
break;
|
||||
case 'm':
|
||||
config->modelPath = arg;
|
||||
break;
|
||||
case 'c':
|
||||
config->classesPath = arg;
|
||||
break;
|
||||
case 'd':
|
||||
config->debug = true;
|
||||
break;
|
||||
case 'o':
|
||||
config->outputDir.assign(arg);
|
||||
break;
|
||||
case 's':
|
||||
config->seamCarving = true;
|
||||
break;
|
||||
case 'f':
|
||||
config->focusPersonImage = arg;
|
||||
break;
|
||||
case 't':
|
||||
config->threshold = std::atof(arg);
|
||||
break;
|
||||
case 'z':
|
||||
{
|
||||
int x = std::stoi(arg);
|
||||
config->targetSize = cv::Size(x, x);
|
||||
break;
|
||||
}
|
||||
case ARGP_KEY_ARG:
|
||||
config->imagePaths.push_back(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
}
|
||||
catch(const std::invalid_argument& ex)
|
||||
{
|
||||
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
|
||||
return ARGP_KEY_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp argp = {options, parse_opt, args_doc, doc};
|
35
SmartCrop/readfile.h
Normal file
35
SmartCrop/readfile.h
Normal file
@ -0,0 +1,35 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
inline std::string readFile(const std::filesystem::path& path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if(!file.is_open())
|
||||
throw std::runtime_error(std::string("could not open file ") + path.string());
|
||||
std::stringstream ss;
|
||||
ss<<file.rdbuf();
|
||||
return ss.str();
|
||||
}
|
376
SmartCrop/seamcarving.cpp
Normal file
376
SmartCrop/seamcarving.cpp
Normal file
@ -0,0 +1,376 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "seamcarving.h"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <cfloat>
|
||||
#include <vector>
|
||||
#include "log.h"
|
||||
|
||||
bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
cv::Mat newFrame = image.clone();
|
||||
assert(!newFrame.empty());
|
||||
std::vector<std::vector<int>> vecSeams;
|
||||
|
||||
for(int i = 0; i < seams; i++)
|
||||
{
|
||||
//Gradient Magnitude for intensity of image.
|
||||
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
|
||||
//Use DP to create the real energy map that is used for path calculation.
|
||||
// Strictly using vertical paths for testing simplicity.
|
||||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
||||
|
||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
||||
return false;
|
||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
||||
vecSeams.push_back(seam);
|
||||
if(seamsVect)
|
||||
seamsVect->push_back(seam);
|
||||
|
||||
newFrame = removeLeastImportantPath(newFrame, seam);
|
||||
|
||||
if(newFrame.rows == 0 || newFrame.cols == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (grow)
|
||||
{
|
||||
cv::Mat growMat = image.clone();
|
||||
|
||||
for(size_t i = 0; i < vecSeams.size(); i++)
|
||||
{
|
||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
||||
}
|
||||
image = growMat;
|
||||
}
|
||||
else
|
||||
{
|
||||
image = newFrame;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
cv::transpose(image, image);
|
||||
bool ret = strechImage(image, seams, grow, seamsVect);
|
||||
cv::transpose(image, image);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
|
||||
{
|
||||
std::vector<std::vector<int>> seamsVect;
|
||||
seamsImage = image.clone();
|
||||
|
||||
bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
|
||||
if(!ret)
|
||||
return false;
|
||||
|
||||
for(size_t i = 0; i < seamsVect.size(); ++i)
|
||||
seamsImage = drawSeam(seamsImage, seamsVect[i]);
|
||||
return true;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
|
||||
{
|
||||
// find partial derivative of x-axis and y-axis seperately
|
||||
// sum up the partial derivates
|
||||
float pd[] = {1, 2, 1, 0, 0, 0, -1, -2 - 1};
|
||||
cv::Mat xFilter(3, 3, CV_32FC1, pd);
|
||||
cv::Mat yFilter = xFilter.t();
|
||||
cv::Mat grayImg;
|
||||
cv::cvtColor(img, grayImg, cv::COLOR_RGBA2GRAY);
|
||||
cv::Mat dxImg;
|
||||
cv::Mat dyImg;
|
||||
|
||||
cv::filter2D(grayImg, dxImg, 0, xFilter);
|
||||
cv::filter2D(grayImg, dyImg, 0, yFilter);
|
||||
//cv::Mat zeroMat = cv::Mat::zeros(dxImg.rows, dxImg.cols, dxImg.type());
|
||||
//cv::Mat absDxImg;
|
||||
//cv::Mat absDyImg;
|
||||
//cv::absdiff(dxImg, zeroMat, absDxImg);
|
||||
//cv::absdiff(dyImg, zeroMat, absDyImg);
|
||||
cv::Mat absDxImg = cv::abs(dxImg);
|
||||
cv::Mat absDyImg = cv::abs(dyImg);
|
||||
|
||||
cv::Mat energyImg;
|
||||
cv::add(absDxImg, absDyImg, energyImg);
|
||||
return energyImg;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::computeGradientMagnitude(const cv::Mat &frame)
|
||||
{
|
||||
cv::Mat grayScale;
|
||||
cv::cvtColor(frame, grayScale, cv::COLOR_RGBA2GRAY);
|
||||
cv::Mat drv = cv::Mat(grayScale.size(), CV_16SC1);
|
||||
cv::Mat drv32f = cv::Mat(grayScale.size(), CV_32FC1);
|
||||
cv::Mat mag = cv::Mat::zeros(grayScale.size(), CV_32FC1);
|
||||
Sobel(grayScale, drv, CV_16SC1, 1, 0);
|
||||
drv.convertTo(drv32f, CV_32FC1);
|
||||
cv::accumulateSquare(drv32f, mag);
|
||||
Sobel(grayScale, drv, CV_16SC1, 0, 1);
|
||||
drv.convertTo(drv32f, CV_32FC1);
|
||||
cv::accumulateSquare(drv32f, mag);
|
||||
cv::sqrt(mag, mag);
|
||||
return mag;
|
||||
}
|
||||
|
||||
float SeamCarving::intensity(float currIndex, int start, int end)
|
||||
{
|
||||
if(start < 0 || start >= end)
|
||||
{
|
||||
return FLT_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
return currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::computePathIntensityMat(const cv::Mat &rawEnergyMap)
|
||||
{
|
||||
cv::Mat pathIntensityMap = cv::Mat(rawEnergyMap.size(), CV_32FC1);
|
||||
|
||||
if(rawEnergyMap.total() == 0 || pathIntensityMap.total() == 0)
|
||||
{
|
||||
return cv::Mat();
|
||||
}
|
||||
|
||||
//First row of intensity paths is the same as the energy map
|
||||
rawEnergyMap.row(0).copyTo(pathIntensityMap.row(0));
|
||||
float max = 0;
|
||||
|
||||
//The rest of them use the DP calculation using the minimum of the 3 pixels above them + their own intensity.
|
||||
for(int row = 1; row < pathIntensityMap.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < pathIntensityMap.cols; col++)
|
||||
{
|
||||
//The initial intensity of the pixel is its raw intensity
|
||||
float pixelIntensity = rawEnergyMap.at<float>(row, col);
|
||||
//The minimum intensity from the current path of the 3 pixels above it is added to its intensity.
|
||||
float p1 = intensity(pathIntensityMap.at<float>(row-1, col-1), col - 1, pathIntensityMap.cols);
|
||||
float p2 = intensity(pathIntensityMap.at<float>(row-1, col), col, pathIntensityMap.cols);
|
||||
float p3 = intensity(pathIntensityMap.at<float>(row-1, col+1), col + 1, pathIntensityMap.cols);
|
||||
|
||||
float minIntensity = std::min(p1, p2);
|
||||
minIntensity = std::min(minIntensity, p3);
|
||||
|
||||
pixelIntensity += minIntensity;
|
||||
|
||||
max = std::max(max, pixelIntensity);
|
||||
pathIntensityMap.at<float>(row, col) = pixelIntensity;
|
||||
}
|
||||
}
|
||||
return pathIntensityMap;
|
||||
}
|
||||
|
||||
std::vector<int> SeamCarving::getLeastImportantPath(const cv::Mat &importanceMap)
|
||||
{
|
||||
if(importanceMap.total() == 0)
|
||||
{
|
||||
return std::vector<int>();
|
||||
}
|
||||
|
||||
//Find the beginning of the least important path. Trying an averaging approach because absolute min wasn't very reliable.
|
||||
float minImportance = importanceMap.at<float>(importanceMap.rows - 1, 0);
|
||||
int minCol = 0;
|
||||
for (int col = 1; col < importanceMap.cols; col++)
|
||||
{
|
||||
float currPixel =importanceMap.at<float>(importanceMap.rows - 1, col);
|
||||
if(currPixel < minImportance)
|
||||
{
|
||||
minCol = col;
|
||||
minImportance = currPixel;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> leastEnergySeam(importanceMap.rows);
|
||||
leastEnergySeam[importanceMap.rows-1] = minCol;
|
||||
for(int row = importanceMap.rows - 2; row >= 0; row--)
|
||||
{
|
||||
float p1 = intensity(importanceMap.at<float>(row, minCol-1), minCol - 1, importanceMap.cols);
|
||||
float p2 = intensity(importanceMap.at<float>(row, minCol), minCol, importanceMap.cols);
|
||||
float p3 = intensity(importanceMap.at<float>(row, minCol+1), minCol + 1, importanceMap.cols);
|
||||
//Adjust the min column for path following
|
||||
if(p1 < p2 && p1 < p3)
|
||||
{
|
||||
minCol -= 1;
|
||||
}
|
||||
else if(p3 < p1 && p3 < p2)
|
||||
{
|
||||
minCol += 1;
|
||||
}
|
||||
leastEnergySeam[row] = minCol;
|
||||
}
|
||||
|
||||
return leastEnergySeam;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Size orgSize = original.size();
|
||||
// new mat needs to shrink by one collumn
|
||||
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
removePixel(original, newMat, row, seam[row]);
|
||||
}
|
||||
return newMat;
|
||||
}
|
||||
|
||||
void SeamCarving::removePixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
|
||||
{
|
||||
int width = original.cols;
|
||||
int channels = original.channels();
|
||||
int originRowStart = row * channels * width;
|
||||
int newRowStart = row * channels * (width - 1);
|
||||
int firstNum = minCol * channels;
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = outputMat.data;
|
||||
|
||||
//std::cout << "originRowStart: " << originRowStart << std::endl;
|
||||
//std::cout << "newRowStart: " << newRowStart << std::endl;
|
||||
//std::cout << "firstNum: " << firstNum << std::endl;
|
||||
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
|
||||
|
||||
int originRowMid = originRowStart + (minCol + 1) * channels;
|
||||
int newRowMid = newRowStart + minCol * channels;
|
||||
int secondNum = (width - 1) * channels - firstNum;
|
||||
|
||||
//std::cout << "originRowMid: " << originRowMid << std::endl;
|
||||
//std::cout << "newRowMid: " << newRowMid << std::endl;
|
||||
//std::cout << "secondNum: " << secondNum << std::endl;
|
||||
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
|
||||
|
||||
int leftPixel = minCol - 1;
|
||||
int rightPixel = minCol + 1;
|
||||
|
||||
int byte1 = rawOrig[originRowStart + minCol * channels];
|
||||
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
|
||||
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
|
||||
|
||||
if (rightPixel < width)
|
||||
{
|
||||
int byte1R = rawOrig[originRowStart + rightPixel * channels];
|
||||
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
|
||||
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
|
||||
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
|
||||
}
|
||||
|
||||
if(leftPixel >= 0)
|
||||
{
|
||||
int byte1L = rawOrig[originRowStart + leftPixel*channels];
|
||||
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
|
||||
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
|
||||
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Size orgSize = original.size();
|
||||
// new mat needs to grow by one column
|
||||
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
|
||||
addPixel(original, newMat, row, seam[row]);
|
||||
}
|
||||
return newMat;
|
||||
}
|
||||
|
||||
void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol)
|
||||
{
|
||||
int width = original.cols;
|
||||
int channels = original.channels();
|
||||
int originRowStart = row * channels * width;
|
||||
int newRowStart = row * channels * (width + 1);
|
||||
int firstNum = (minCol + 1) * channels;
|
||||
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = outputMat.data;
|
||||
|
||||
memcpy(rawOutput + newRowStart, rawOrig + originRowStart, firstNum);
|
||||
|
||||
memcpy(rawOutput + newRowStart + firstNum, rawOrig + originRowStart + firstNum, channels);
|
||||
|
||||
int originRowMid = originRowStart + ((minCol + 1) * channels);
|
||||
int newRowMid = newRowStart + ((minCol + 2) * channels);
|
||||
int secondNum = (width * channels) - firstNum;
|
||||
|
||||
memcpy(rawOutput + newRowMid, rawOrig + originRowMid, secondNum);
|
||||
|
||||
int leftPixel = minCol - 1;
|
||||
int rightPixel = minCol + 1;
|
||||
|
||||
int byte1 = rawOrig[originRowStart + minCol * channels];
|
||||
int byte2 = rawOrig[originRowStart + minCol * channels + 1];
|
||||
int byte3 = rawOrig[originRowStart + minCol * channels + 2];
|
||||
|
||||
if (rightPixel < width)
|
||||
{
|
||||
int byte1R = rawOrig[originRowStart + rightPixel * channels];
|
||||
int byte2R = rawOrig[originRowStart + rightPixel * channels + 1];
|
||||
int byte3R = rawOrig[originRowStart + rightPixel * channels + 2];
|
||||
rawOutput[newRowStart + minCol * channels] = (unsigned char)((byte1 + byte1R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 1] = (unsigned char)((byte2 + byte2R) / 2);
|
||||
rawOutput[newRowStart + minCol * channels + 2] = (unsigned char)((byte3 + byte3R) / 2);
|
||||
}
|
||||
|
||||
if(leftPixel >= 0)
|
||||
{
|
||||
int byte1L = rawOrig[originRowStart + leftPixel*channels];
|
||||
int byte2L = rawOrig[originRowStart + leftPixel*channels+1];
|
||||
int byte3L = rawOrig[originRowStart + leftPixel*channels+2];
|
||||
rawOutput[newRowStart + leftPixel*channels] = (unsigned char) ((byte1 + byte1L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+1] = (unsigned char) ((byte2 + byte2L)/2);
|
||||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Mat retMat = frame.clone();
|
||||
for(int row = 0; row < frame.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < frame.cols; col++)
|
||||
{
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
||||
}
|
||||
}
|
||||
return retMat;
|
||||
}
|
43
SmartCrop/seamcarving.h
Normal file
43
SmartCrop/seamcarving.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <vector>
|
||||
|
||||
class SeamCarving
|
||||
{
|
||||
private:
|
||||
static cv::Mat GetEnergyImg(const cv::Mat &img);
|
||||
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
||||
static float intensity(float currIndex, int start, int end);
|
||||
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
||||
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
||||
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
||||
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
||||
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
|
||||
|
||||
public:
|
||||
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
|
||||
};
|
46
SmartCrop/tokenize.cpp
Normal file
46
SmartCrop/tokenize.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "tokenize.h"
|
||||
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string token;
|
||||
bool inBaracket = false;
|
||||
for(size_t i = 0; i < str.size(); ++i)
|
||||
{
|
||||
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
|
||||
{
|
||||
tokens.push_back(token);
|
||||
token.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
token.push_back(str[i]);
|
||||
}
|
||||
if(ignoreBraket == str[i])
|
||||
inBaracket = !inBaracket;
|
||||
}
|
||||
if(!inBaracket)
|
||||
tokens.push_back(token);
|
||||
return tokens;
|
||||
}
|
26
SmartCrop/tokenize.h
Normal file
26
SmartCrop/tokenize.h
Normal file
@ -0,0 +1,26 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
|
||||
const char escapeChar = '\0');
|
80
SmartCrop/utils.cpp
Normal file
80
SmartCrop/utils.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path)
|
||||
{
|
||||
return std::filesystem::is_regular_file(path) && (path.extension() == ".png" || path.extension() == ".jpg" || path.extension() == ".jpeg");
|
||||
}
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths)
|
||||
{
|
||||
if(isImagePath(path))
|
||||
{
|
||||
paths.push_back(path);
|
||||
}
|
||||
else if(std::filesystem::is_directory(path))
|
||||
{
|
||||
for(const std::filesystem::directory_entry& dirent : std::filesystem::directory_iterator(path))
|
||||
{
|
||||
if(std::filesystem::is_directory(dirent.path()))
|
||||
getImageFiles(dirent.path(), paths);
|
||||
else if(isImagePath(dirent.path()))
|
||||
paths.push_back(dirent.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points)
|
||||
{
|
||||
int left = std::numeric_limits<int>::max();
|
||||
int right = std::numeric_limits<int>::min();
|
||||
int top = std::numeric_limits<int>::max();
|
||||
int bottom = std::numeric_limits<int>::min();
|
||||
|
||||
for(const std::pair<cv::Point, int>& point : points)
|
||||
{
|
||||
left = point.first.x < left ? point.first.x : left;
|
||||
right = point.first.x > right ? point.first.x : right;
|
||||
|
||||
top = point.first.y < top ? point.first.y : top;
|
||||
bottom = point.first.y > bottom ? point.first.y : bottom;
|
||||
}
|
||||
|
||||
return cv::Rect(left, top, right-left, bottom-top);
|
||||
}
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB)
|
||||
{
|
||||
cv::Vec2i a(pointA.x, pointA.y);
|
||||
cv::Vec2i b(pointB.x, pointB.y);
|
||||
return cv::norm(a-b);
|
||||
}
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect)
|
||||
{
|
||||
return point.x >= rect.x && point.x <= rect.x+rect.width &&
|
||||
point.y >= rect.y && point.y <= rect.y+rect.height;
|
||||
}
|
34
SmartCrop/utils.h
Normal file
34
SmartCrop/utils.h
Normal file
@ -0,0 +1,34 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path);
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
|
278
SmartCrop/yolo.cpp
Normal file
278
SmartCrop/yolo.cpp
Normal file
@ -0,0 +1,278 @@
|
||||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "readfile.h"
|
||||
#include "tokenize.h"
|
||||
#include "log.h"
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
|
||||
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
|
||||
|
||||
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
||||
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
||||
{
|
||||
modelPath = onnxModelPath;
|
||||
modelShape = modelInputShape;
|
||||
|
||||
if(classesTxtFilePath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin classes";
|
||||
loadClasses(rdefaultClassesData);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string classesStr = readFile(classesTxtFilePath);
|
||||
loadClasses(classesStr);
|
||||
}
|
||||
|
||||
if(!modelPath.empty())
|
||||
{
|
||||
net = cv::dnn::readNetFromONNX(modelPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin yolo model";
|
||||
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
|
||||
}
|
||||
if(runWithOCl)
|
||||
{
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
||||
}
|
||||
else
|
||||
{
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
|
||||
{
|
||||
cv::Mat modelInput = input;
|
||||
if (letterBoxForSquare && modelShape.width == modelShape.height)
|
||||
modelInput = formatToSquare(modelInput);
|
||||
|
||||
cv::Mat blob;
|
||||
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
|
||||
net.setInput(blob);
|
||||
|
||||
std::vector<cv::Mat> outputs;
|
||||
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
||||
|
||||
int rows = outputs[0].size[1];
|
||||
int dimensions = outputs[0].size[2];
|
||||
|
||||
bool yolov8 = false;
|
||||
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
||||
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
||||
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
|
||||
{
|
||||
yolov8 = true;
|
||||
rows = outputs[0].size[2];
|
||||
dimensions = outputs[0].size[1];
|
||||
|
||||
outputs[0] = outputs[0].reshape(1, dimensions);
|
||||
cv::transpose(outputs[0], outputs[0]);
|
||||
}
|
||||
float *data = (float *)outputs[0].data;
|
||||
|
||||
float x_factor = modelInput.cols / modelShape.width;
|
||||
float y_factor = modelInput.rows / modelShape.height;
|
||||
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
if (yolov8)
|
||||
{
|
||||
float *classes_scores = data+4;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double maxClassScore;
|
||||
|
||||
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||
|
||||
if (maxClassScore > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(maxClassScore);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
else // yolov5
|
||||
{
|
||||
float confidence = data[4];
|
||||
|
||||
if (confidence >= modelConfidenceThreshold)
|
||||
{
|
||||
float *classes_scores = data+5;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double max_class_score;
|
||||
|
||||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||||
|
||||
if (max_class_score > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(confidence);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data += dimensions;
|
||||
}
|
||||
|
||||
std::vector<int> nms_result;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
|
||||
|
||||
std::vector<Yolo::Detection> detections{};
|
||||
for(unsigned long i = 0; i < nms_result.size(); ++i)
|
||||
{
|
||||
int idx = nms_result[i];
|
||||
|
||||
Yolo::Detection result;
|
||||
result.class_id = class_ids[idx];
|
||||
result.confidence = confidences[idx];
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<int> dis(100, 255);
|
||||
result.color = cv::Scalar(dis(gen),
|
||||
dis(gen),
|
||||
dis(gen));
|
||||
|
||||
result.className = classes[result.class_id].first;
|
||||
result.priority = classes[result.class_id].second;
|
||||
clampBox(boxes[idx], input.size());
|
||||
result.box = boxes[idx];
|
||||
detections.push_back(result);
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
|
||||
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
|
||||
{
|
||||
if(box.x < 0)
|
||||
{
|
||||
box.width += box.x;
|
||||
box.x = 0;
|
||||
}
|
||||
if(box.y < 0)
|
||||
{
|
||||
box.height += box.y;
|
||||
box.y = 0;
|
||||
}
|
||||
if(box.x+box.width > size.width)
|
||||
box.width = size.width - box.x;
|
||||
if(box.y+box.height > size.height)
|
||||
box.height = size.height - box.y;
|
||||
}
|
||||
|
||||
void Yolo::loadClasses(const std::string& classesStr)
|
||||
{
|
||||
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
|
||||
classes.clear();
|
||||
for(std::string& instance : candidateClasses)
|
||||
{
|
||||
if(instance.size() < 2)
|
||||
continue;
|
||||
|
||||
std::vector<std::string> tokens = tokenizeBinaryIgnore(instance, ',', '"', '\\');
|
||||
|
||||
if(*tokens[0].begin() == '"')
|
||||
instance.erase(tokens[0].begin());
|
||||
if(tokens[0].back() == '"')
|
||||
tokens[0].pop_back();
|
||||
int priority = -1;
|
||||
if(tokens.size() > 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
priority = std::stoi(tokens[1]);
|
||||
}
|
||||
catch(const std::invalid_argument& err)
|
||||
{
|
||||
Log(Log::WARN)<<"unable to get priority for class "<<tokens[0]<<' '<<err.what();
|
||||
}
|
||||
}
|
||||
classes.push_back({tokens[0], priority});
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
|
||||
{
|
||||
int col = source.cols;
|
||||
int row = source.rows;
|
||||
int _max = MAX(col, row);
|
||||
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
||||
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
||||
return result;
|
||||
}
|
||||
|
||||
int Yolo::getClassForStr(const std::string& str) const
|
||||
{
|
||||
for(size_t i = 0; i < classes.size(); ++i)
|
||||
{
|
||||
if(classes[i].first == str)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
65
SmartCrop/yolo.h
Normal file
65
SmartCrop/yolo.h
Normal file
@ -0,0 +1,65 @@
|
||||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <filesystem>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
class Yolo
|
||||
{
|
||||
public:
|
||||
struct Detection
|
||||
{
|
||||
int class_id = 0;
|
||||
std::string className;
|
||||
float confidence = 0.0;
|
||||
int priority = -1;
|
||||
cv::Scalar color;
|
||||
cv::Rect box;
|
||||
};
|
||||
|
||||
private:
|
||||
static constexpr float modelConfidenceThreshold = 0.25;
|
||||
static constexpr float modelScoreThreshold = 0.45;
|
||||
static constexpr float modelNMSThreshold = 0.50;
|
||||
|
||||
std::string modelPath;
|
||||
std::vector<std::pair<std::string, int>> classes;
|
||||
cv::Size2f modelShape;
|
||||
bool letterBoxForSquare = true;
|
||||
cv::dnn::Net net;
|
||||
|
||||
void loadClasses(const std::string& classes);
|
||||
void loadOnnxNetwork(const std::filesystem::path& path);
|
||||
cv::Mat formatToSquare(const cv::Mat &source);
|
||||
static void clampBox(cv::Rect& box, const cv::Size& size);
|
||||
|
||||
public:
|
||||
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
|
||||
const std::filesystem::path& classesTxtFilePath = "", bool runWithOCl = true);
|
||||
std::vector<Detection> runInference(const cv::Mat &input);
|
||||
int getClassForStr(const std::string& str) const;
|
||||
};
|
80
Weights/classes.txt
Normal file
80
Weights/classes.txt
Normal file
@ -0,0 +1,80 @@
|
||||
person, 10
|
||||
bicycle, 4
|
||||
car, 3
|
||||
motorcycle, 4
|
||||
airplane, 4
|
||||
bus, 4
|
||||
train, 4
|
||||
truck, 3
|
||||
boat, 4
|
||||
traffic light, 1
|
||||
fire hydrant, 1
|
||||
stop sign, 1
|
||||
parking meter, 1
|
||||
bench, 2
|
||||
bird, 5
|
||||
cat, 6
|
||||
dog, 5
|
||||
horse, 4
|
||||
sheep, 5
|
||||
cow, 4
|
||||
elephant, 5
|
||||
bear, 5
|
||||
zebra, 5
|
||||
giraffe, 5
|
||||
backpack, 3
|
||||
umbrella, 3
|
||||
handbag, 3
|
||||
tie, 3
|
||||
suitcase, 2
|
||||
frisbee, 3
|
||||
skis, 3
|
||||
snowboard, 3
|
||||
sports ball, 3
|
||||
kite, 4
|
||||
baseball bat, 3
|
||||
baseball glove, 3
|
||||
skateboard, 3
|
||||
surfboard, 3
|
||||
tennis racket, 3
|
||||
bottle, 2
|
||||
wine glass, 2
|
||||
cup, 2
|
||||
fork, 1
|
||||
knife, 1
|
||||
spoon, 1
|
||||
bowl, 1
|
||||
banana, 1
|
||||
apple, 1
|
||||
sandwich,1
|
||||
orange, 1
|
||||
broccoli, 1
|
||||
carrot, 1
|
||||
hot dog, 1
|
||||
pizza, 1
|
||||
donut, 2
|
||||
cake, 2
|
||||
chair, 1
|
||||
couch, 1
|
||||
potted plant, 1
|
||||
bed, 1
|
||||
dining table, 1
|
||||
toilet, 1
|
||||
tv, 1
|
||||
laptop, 1
|
||||
mouse, 1
|
||||
remote, 1
|
||||
keyboard, 1
|
||||
cell phone, 1
|
||||
microwave, 1
|
||||
oven, 1
|
||||
toaster, 1
|
||||
sink, 1
|
||||
refrigerator, 1
|
||||
book, 1
|
||||
clock, 1
|
||||
vase, 1
|
||||
scissors, 1
|
||||
teddy bear, 1
|
||||
hair drier, 1
|
||||
toothbrush, 1
|
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
Binary file not shown.
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user