Compare commits
11 commits
749e22335e
...
c57d6124fa
Author | SHA1 | Date | |
---|---|---|---|
|
c57d6124fa | ||
|
fa52615390 | ||
|
882f91a8a6 | ||
|
e12ede007a | ||
|
a6517351ea | ||
|
6e05c0fdb5 | ||
|
44c03310d8 | ||
|
7b7dbc41ac | ||
|
555efd4af6 | ||
|
ccefb86c1a | ||
|
2765509bf3 |
|
@ -1,15 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.6)
|
||||
project(AIImagePrepross)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
project(ImageAiUtils)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SRC_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
|
||||
|
||||
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
|
||||
add_subdirectory(SmartCrop)
|
||||
|
|
105
DanbooruTagger/DanbooruTagger.py
Normal file
105
DanbooruTagger/DanbooruTagger.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
import warnings
|
||||
from deepdanbooru_onnx import DeepDanbooru
|
||||
from PIL import Image
|
||||
import argparse
|
||||
import cv2
|
||||
import os
|
||||
from multiprocessing import Process, Queue
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]):
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
imagebgr = cv2.imread(path)
|
||||
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
image_pil = Image.fromarray(image)
|
||||
yield image_pil, path
|
||||
|
||||
|
||||
def pipeline(queue: Queue, image_paths: list[str], device: int):
|
||||
danbooru = DeepDanbooru()
|
||||
|
||||
for path in image_paths:
|
||||
imageprompt = ""
|
||||
tags = danbooru(path)
|
||||
for tag in tags:
|
||||
imageprompt = imageprompt + ", " + tag
|
||||
|
||||
queue.put({"file_name": path, "text": imageprompt})
|
||||
|
||||
|
||||
def split_list(input_list, count):
|
||||
target_length = int(len(input_list) / count)
|
||||
for i in range(0, count - 1):
|
||||
yield input_list[i * target_length: (i + 1) * target_length]
|
||||
yield input_list[(count - 1) * target_length: len(input_list)]
|
||||
|
||||
|
||||
def save_meta(meta_file, meta, reldir, common_description):
|
||||
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
|
||||
if common_description is not None:
|
||||
meta["text"] = common_description + meta["text"]
|
||||
meta_file.write(json.dumps(meta) + '\n')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru")
|
||||
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
|
||||
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
|
||||
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
|
||||
args = parser.parse_args()
|
||||
|
||||
nparalell = 2
|
||||
|
||||
image_paths = find_image_files(args.image_dir)
|
||||
image_path_chunks = list(split_list(image_paths, nparalell))
|
||||
|
||||
print(f"Will use {nparalell} processies to create tags")
|
||||
|
||||
queue = Queue()
|
||||
processies = list()
|
||||
for i in range(0, nparalell):
|
||||
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i)))
|
||||
processies[-1].start()
|
||||
|
||||
progress = tqdm(desc="Generateing tags", total=len(image_paths))
|
||||
exit = False
|
||||
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
|
||||
while not exit:
|
||||
if not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
exit = True
|
||||
for process in processies:
|
||||
if process.is_alive():
|
||||
exit = False
|
||||
break
|
||||
|
||||
while not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
|
||||
for process in processies:
|
||||
process.join()
|
||||
|
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from .deepdanbooru_onnx import DeepDanbooru
|
||||
from .deepdanbooru_onnx import process_image
|
||||
__version__ = '0.0.8'
|
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
|
@ -0,0 +1,244 @@
|
|||
import onnxruntime as ort
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import hashlib
|
||||
from typing import List, Union
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def process_image(image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Convert an image to a numpy array.
|
||||
:param image: the image to convert
|
||||
:return: the numpy array
|
||||
"""
|
||||
|
||||
image = image.convert("RGB").resize((512, 512))
|
||||
image = np.array(image).astype(np.float32) / 255
|
||||
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
|
||||
return image
|
||||
|
||||
|
||||
def download(url: str, save_path: str, md5: str, length: str) -> bool:
|
||||
"""
|
||||
Download a file from url to save_path.
|
||||
If the file already exists, check its md5.
|
||||
If the md5 matches, return True,if the md5 doesn't match, return False.
|
||||
:param url: the url of the file to download
|
||||
:param save_path: the path to save the file
|
||||
:param md5: the md5 of the file
|
||||
:param length: the length of the file
|
||||
:return: True if the file is downloaded successfully, False otherwise
|
||||
"""
|
||||
|
||||
try:
|
||||
response = requests.get(url=url, stream=True)
|
||||
with open(save_path, "wb") as f:
|
||||
with tqdm.wrapattr(
|
||||
response.raw, "read", total=length, desc="Downloading"
|
||||
) as r_raw:
|
||||
shutil.copyfileobj(r_raw, f)
|
||||
return (
|
||||
True
|
||||
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
|
||||
else False
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
|
||||
def download_model():
|
||||
"""
|
||||
Download the model and tags file from the server.
|
||||
:return: the path to the model and tags file
|
||||
"""
|
||||
|
||||
model_url = (
|
||||
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
|
||||
)
|
||||
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
|
||||
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
|
||||
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
|
||||
model_length = 643993025
|
||||
tags_length = 133810
|
||||
|
||||
home = str(Path.home()) + "/.deepdanbooru_onnx/"
|
||||
if not os.path.exists(home):
|
||||
os.mkdir(home)
|
||||
|
||||
model_name = "deepdanbooru.onnx"
|
||||
tags_name = "tags.txt"
|
||||
|
||||
model_path = home + model_name
|
||||
tags_path = home + tags_name
|
||||
if os.path.exists(model_path):
|
||||
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
|
||||
os.remove(model_path)
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
else:
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
if os.path.exists(tags_path):
|
||||
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
|
||||
os.remove(tags_path)
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
else:
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
return model_path, tags_path
|
||||
|
||||
|
||||
class DeepDanbooru:
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = "auto",
|
||||
model_path: Union[str, None] = None,
|
||||
tags_path: Union[str, None] = None,
|
||||
threshold: Union[float, int] = 0.6,
|
||||
pin_memory: bool = False,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
"""
|
||||
Initialize the DeepDanbooru class.
|
||||
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
|
||||
:param model_path: the path to the model file
|
||||
:param tags_path: the path to the tags file
|
||||
:param threshold: the threshold of the model
|
||||
:param pin_memory: whether to use pin memory
|
||||
:param batch_size: the batch size of the model
|
||||
"""
|
||||
|
||||
providers = {
|
||||
"cpu": "CPUExecutionProvider",
|
||||
"gpu": "CUDAExecutionProvider",
|
||||
"tensorrt": "TensorrtExecutionProvider",
|
||||
"auto": (
|
||||
"CUDAExecutionProvider"
|
||||
if "CUDAExecutionProvider" in ort.get_available_providers()
|
||||
else "CPUExecutionProvider"
|
||||
),
|
||||
}
|
||||
|
||||
if not (isinstance(threshold, float) or isinstance(threshold, int)):
|
||||
raise TypeError("threshold must be float or int")
|
||||
if threshold < 0 or threshold > 1:
|
||||
raise ValueError("threshold must be between 0 and 1")
|
||||
if mode not in providers:
|
||||
raise ValueError(
|
||||
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
|
||||
)
|
||||
if providers[mode] not in ort.get_available_providers():
|
||||
raise ValueError(
|
||||
f"Your device is not supported {mode}. Please choose from: cpu"
|
||||
)
|
||||
if model_path is not None and not os.path.exists(model_path):
|
||||
raise FileNotFoundError("Model file not found")
|
||||
if tags_path is not None and not os.path.exists(tags_path):
|
||||
raise FileNotFoundError("Tags file not found")
|
||||
|
||||
if model_path is None or tags_path is None:
|
||||
model_path, tags_path = download_model()
|
||||
|
||||
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
|
||||
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
|
||||
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
self.output_name = [output.name for output in self.session.get_outputs()]
|
||||
self.threshold = threshold
|
||||
self.pin_memory = pin_memory
|
||||
self.batch_size = batch_size
|
||||
self.mode = mode
|
||||
self.cache = {}
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
def from_image_inference(self, image: Image.Image) -> dict:
|
||||
image = process_image(image)
|
||||
return self.predict(image)
|
||||
|
||||
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
|
||||
if image.shape != (1, 512, 512, 3):
|
||||
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
|
||||
return self.predict(image)
|
||||
|
||||
def from_file_inference(self, image: str) -> dict:
|
||||
return self.from_image_inference(Image.open(image))
|
||||
|
||||
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
|
||||
if self.pin_memory:
|
||||
image = [process_image(Image.open(i)) for i in image]
|
||||
for i in [
|
||||
image[i : i + self.batch_size]
|
||||
for i in range(0, len(image), self.batch_size)
|
||||
]:
|
||||
imagelist = i
|
||||
bs = len(i)
|
||||
_imagelist, idx, hashlist = [], [], []
|
||||
for j in range(len(i)):
|
||||
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
|
||||
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
|
||||
hashlist.append(image_hash)
|
||||
if image_hash in self.cache:
|
||||
continue
|
||||
if not self.pin_memory:
|
||||
_imagelist.append(process_image(img))
|
||||
else:
|
||||
_imagelist.append(imagelist[j])
|
||||
idx.append(j)
|
||||
|
||||
imagelist = _imagelist
|
||||
if len(imagelist) != 0:
|
||||
_image = np.vstack(imagelist)
|
||||
results = self.inference(_image)
|
||||
results_idx = 0
|
||||
else:
|
||||
results = []
|
||||
|
||||
for i in range(bs):
|
||||
image_tag = {}
|
||||
if i in idx:
|
||||
hash = hashlist[i]
|
||||
for tag, score in zip(self.tags, results[results_idx]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
results_idx += 1
|
||||
self.cache[hash] = image_tag
|
||||
yield image_tag
|
||||
else:
|
||||
yield self.cache[hashlist[i]]
|
||||
|
||||
def inference(self, image):
|
||||
return self.session.run(self.output_name, {self.input_name: image})[0]
|
||||
|
||||
def predict(self, image):
|
||||
result = self.inference(image)
|
||||
image_tag = {}
|
||||
for tag, score in zip(self.tags, result[0]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
return image_tag
|
||||
|
||||
def __call__(self, image) -> Union[dict, List[dict]]:
|
||||
if isinstance(image, str):
|
||||
return self.from_file_inference(image)
|
||||
elif isinstance(image, np.ndarray):
|
||||
return self.from_ndarray_inferece(image)
|
||||
elif isinstance(image, list) or isinstance(image, tuple):
|
||||
return self.from_list_inference(image)
|
||||
elif isinstance(image, Image.Image):
|
||||
return self.from_image_inference(image)
|
||||
else:
|
||||
raise ValueError("Image must be a file path or a numpy array or list/tuple")
|
21
LLavaTagger/README.md
Normal file
21
LLavaTagger/README.md
Normal file
|
@ -0,0 +1,21 @@
|
|||
# LLavaTagger
|
||||
|
||||
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
|
||||
|
||||
## How to use
|
||||
|
||||
first create a python venv and install the required packages into it:
|
||||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Then run LLavaTagger for instance like so:
|
||||
|
||||
$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images
|
||||
|
||||
By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus
|
||||
|
||||
LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss)
|
||||
|
||||
If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose
|
11
LLavaTagger/requirements.txt
Normal file
11
LLavaTagger/requirements.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
accelerate==0.29.0
|
||||
bitsandbytes
|
||||
huggingface-hub==0.22.2
|
||||
ninja==1.11.1.1
|
||||
safetensors==0.4.2
|
||||
tokenizers==0.15.2
|
||||
transformers
|
||||
torch
|
||||
opencv-python
|
||||
numpy
|
||||
tqdm
|
174
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
174
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
|
@ -0,0 +1,174 @@
|
|||
#!/bin/python3
|
||||
|
||||
# PersonDatasetAssembler - A tool to assmble images of a specific person from a
|
||||
# directory of images or from a video file
|
||||
# Copyright (C) 2024 Carl Philipp Klemm
|
||||
#
|
||||
# This file is part of PersonDatasetAssembler.
|
||||
#
|
||||
# PersonDatasetAssembler is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# PersonDatasetAssembler is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with PersonDatasetAssembler. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from typing import Iterator
|
||||
import cv2
|
||||
import numpy
|
||||
from tqdm import tqdm
|
||||
from wand.exceptions import BlobError
|
||||
from wand.image import Image
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
image_ext_wand = [".dng", ".arw"]
|
||||
|
||||
|
||||
class LoadException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
if extension in image_ext_ocv:
|
||||
image = cv2.imread(path)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
yield image
|
||||
elif extension in image_ext_wand:
|
||||
try:
|
||||
image = Image(filename=path)
|
||||
except BlobError as e:
|
||||
print(f"Warning: could not load {path}, {e}")
|
||||
continue
|
||||
|
||||
|
||||
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
|
||||
ret = True
|
||||
frame_counter = 0
|
||||
while ret:
|
||||
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
|
||||
ret, frame = video.read()
|
||||
if ret:
|
||||
yield frame
|
||||
frame_counter += interval
|
||||
|
||||
|
||||
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
return 0, False
|
||||
for face in faces:
|
||||
cropped_image = recognizer.alignCrop(image, face)
|
||||
features = recognizer.feature(cropped_image)
|
||||
score_accum = 0.0
|
||||
for referance in referance_features:
|
||||
score_accum += recognizer.match(referance, features, 0)
|
||||
score = score_accum / len(referance_features)
|
||||
if score > thresh:
|
||||
return score, True
|
||||
return 0, False
|
||||
|
||||
|
||||
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
|
||||
images = list()
|
||||
out = list()
|
||||
|
||||
if os.path.isfile(referance_path):
|
||||
image = cv2.imread(referance_path)
|
||||
if image is None:
|
||||
print(f"Could not load image from {referance_path}")
|
||||
else:
|
||||
images.append(image)
|
||||
elif os.path.isdir(referance_path):
|
||||
filenames = find_image_files(referance_path)
|
||||
images = list(image_loader(filenames))
|
||||
|
||||
for image in images:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
print("unable to find face in referance image")
|
||||
exit(1)
|
||||
image = recognizer.alignCrop(image, faces[0])
|
||||
features = recognizer.feature(image)
|
||||
out.append(features)
|
||||
|
||||
return out
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
|
||||
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
|
||||
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
|
||||
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
|
||||
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
|
||||
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
|
||||
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
|
||||
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
|
||||
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
|
||||
args = parser.parse_args()
|
||||
|
||||
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
|
||||
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
|
||||
referance_features = process_referance(detector, recognizer, args.referance)
|
||||
if len(referance_features) < 1:
|
||||
print(f"Could not load any referance image(s) from {args.referance}")
|
||||
exit(1)
|
||||
|
||||
if os.path.isfile(args.input):
|
||||
video = cv2.VideoCapture(args.input)
|
||||
if not video.isOpened():
|
||||
print(f"Unable to open {args.input} as a video file")
|
||||
exit(1)
|
||||
image_generator = extract_video_images(video, args.skip + 1)
|
||||
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
|
||||
elif os.path.isdir(args.input):
|
||||
image_filenams = find_image_files(args.input)
|
||||
image_generator = image_loader(image_filenams)
|
||||
total_images = len(image_filenams)
|
||||
else:
|
||||
print(f"{args.input} is not a video file nor is it a directory")
|
||||
exit(1)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
|
||||
progress = tqdm(total=int(total_images), desc="0.00")
|
||||
counter = 0
|
||||
for image in image_generator:
|
||||
if image.shape[0] > 512:
|
||||
aspect = image.shape[0] / image.shape[1]
|
||||
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
|
||||
else:
|
||||
resized = image
|
||||
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
|
||||
if match and not args.invert or not match and args.invert:
|
||||
filename = f"{counter:04}.png"
|
||||
cv2.imwrite(os.path.join(args.out, filename), image)
|
||||
counter += 1
|
||||
progress.set_description(f"{score:1.2f}")
|
||||
progress.update()
|
||||
|
20
PersonDatasetAssembler/README.md
Normal file
20
PersonDatasetAssembler/README.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
### PersonDatasetAssembler
|
||||
|
||||
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
|
||||
|
||||
## How to use
|
||||
|
||||
first create a python venv and install the required packages into it:
|
||||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Then run PersonDatasetAssembler for instance like so:
|
||||
|
||||
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson
|
||||
|
||||
Or to extract images from a video:
|
||||
|
||||
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson
|
||||
|
4
PersonDatasetAssembler/requirements.txt
Normal file
4
PersonDatasetAssembler/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
numpy==1.26.4
|
||||
opencv-python==4.10.0.82
|
||||
tqdm==4.66.4
|
||||
Wand==0.6.13
|
35
README.md
Normal file
35
README.md
Normal file
|
@ -0,0 +1,35 @@
|
|||
# SDImagePreprocess
|
||||
|
||||
This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion.
|
||||
|
||||
## Included tools
|
||||
|
||||
This repo contains the following tools:
|
||||
|
||||
### SmartCrop
|
||||
|
||||
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
|
||||
|
||||
#### Content detected in image:
|
||||
|
||||

|
||||
|
||||
#### Cropped image based on content:
|
||||

|
||||
|
||||
### PersonDatasetAssembler
|
||||
|
||||
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
|
||||
|
||||
### LLavaTagger
|
||||
|
||||
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
|
||||
|
||||
### DanbooruTagger
|
||||
|
||||
DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
All files in this repo are litcenced GPL V3, see LICENSE
|
16
SmartCrop/CMakeLists.txt
Normal file
16
SmartCrop/CMakeLists.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
cmake_minimum_required(VERSION 3.6)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||
|
||||
add_executable(smartcrop ${SRC_FILES})
|
||||
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(smartcrop PRIVATE -s -g -Wall)
|
||||
message(WARNING ${WEIGHT_DIR})
|
||||
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
|
||||
|
||||
install(TARGETS smartcrop RUNTIME DESTINATION bin)
|
50
SmartCrop/README.md
Normal file
50
SmartCrop/README.md
Normal file
|
@ -0,0 +1,50 @@
|
|||
# SmartCrop
|
||||
|
||||
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
|
||||
|
||||
## Requirements
|
||||
|
||||
* [cmake](https://cmake.org/) 3.6 or later
|
||||
* [opencv](https://opencv.org/) 4.8 or later
|
||||
* A c++17 capable compiler and standard lib like gcc or llvm/clang
|
||||
* git is required to get the source
|
||||
|
||||
## Building
|
||||
|
||||
The steps to build this application are:
|
||||
|
||||
$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
|
||||
$ cd SDImagePreprocess
|
||||
$ mkdir build
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
The binary can then be found in build/SmartCrop and can optionaly be installed with:
|
||||
|
||||
$ sudo make install
|
||||
|
||||
## Basic usage
|
||||
|
||||
To process all images in the directory ~/images and output the images into ~/proceesedImages:
|
||||
|
||||
$ smartcrop --out processedImages ~/images/*
|
||||
|
||||
To also focus on the person in the image ~/person.jpg
|
||||
|
||||
$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
|
||||
|
||||
To also enable seam carving
|
||||
|
||||
$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
|
||||
|
||||
see smartcrop --help for more
|
||||
|
||||
## Example
|
||||
|
||||
#### Content detected in image:
|
||||

|
||||
|
||||
#### Cropped image based on content:
|
||||

|
||||
|
||||
|
|
@ -1,11 +1,31 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "facerecognizer.h"
|
||||
#include <filesystem>
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
|
||||
INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
|
||||
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
|
||||
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
|
@ -106,28 +126,35 @@ void FaceRecognizer::clearReferances()
|
|||
referanceFeatures.clear();
|
||||
}
|
||||
|
||||
std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||
{
|
||||
cv::Mat faces = detectFaces(input);
|
||||
|
||||
if(alone && faces.rows > 1)
|
||||
return {-2, 0};
|
||||
Detection bestMatch;
|
||||
bestMatch.confidence = 0;
|
||||
bestMatch.person = -1;
|
||||
|
||||
std::pair<int, double> bestMatch = {-1, 0};
|
||||
if(alone && faces.rows > 1)
|
||||
{
|
||||
bestMatch.person = -2;
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
for(int i = 0; i < faces.rows; ++i)
|
||||
{
|
||||
cv::Mat face;
|
||||
recognizer->alignCrop(input, faces.row(0), face);
|
||||
recognizer->alignCrop(input, faces.row(i), face);
|
||||
cv::Mat features;
|
||||
recognizer->feature(face, features);
|
||||
features = features.clone();
|
||||
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||
{
|
||||
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||
if(score > threshold && score > bestMatch.second)
|
||||
if(score > threshold && score > bestMatch.confidence)
|
||||
{
|
||||
bestMatch = {referanceIndex, score};
|
||||
bestMatch.confidence = score;
|
||||
bestMatch.person = referanceIndex;
|
||||
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,22 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <exception>
|
||||
#include <opencv2/core/mat.hpp>
|
||||
|
@ -11,6 +30,13 @@ class FaceRecognizer
|
|||
{
|
||||
public:
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int person;
|
||||
float confidence;
|
||||
cv::Rect rect;
|
||||
};
|
||||
|
||||
class LoadException : public std::exception
|
||||
{
|
||||
private:
|
||||
|
@ -33,7 +59,7 @@ private:
|
|||
public:
|
||||
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||
cv::Mat detectFaces(const cv::Mat& input);
|
||||
std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
|
||||
Detection isMatch(const cv::Mat& input, bool alone = false);
|
||||
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||
void setThreshold(double threashold);
|
||||
double getThreshold();
|
|
@ -1,3 +1,22 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file incbin.h
|
||||
* @author Dale Weiler
|
|
@ -1,3 +1,23 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "intelligentroi.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
37
SmartCrop/intelligentroi.h
Normal file
37
SmartCrop/intelligentroi.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
class InteligentRoi
|
||||
{
|
||||
private:
|
||||
int personId;
|
||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
|
||||
public:
|
||||
InteligentRoi(const Yolo& yolo);
|
||||
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
};
|
|
@ -1,3 +1,23 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <opencv2/core.hpp>
|
||||
|
@ -243,12 +263,12 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
|||
if(image.cols > image.rows)
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.cols;
|
||||
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
|
||||
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.rows;
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, cv::INTER_CUBIC);
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -276,12 +296,13 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
|
|||
{
|
||||
cv::Mat person = image(detection.box);
|
||||
reconizerMutex.lock();
|
||||
std::pair<int, double> match = recognizer->isMatch(person);
|
||||
FaceRecognizer::Detection match = recognizer->isMatch(person);
|
||||
reconizerMutex.unlock();
|
||||
if(match.first >= 0)
|
||||
if(match.person >= 0)
|
||||
{
|
||||
detection.priority += 10;
|
||||
hasmatch = true;
|
||||
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
|
||||
}
|
||||
}
|
||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||
|
@ -429,7 +450,7 @@ int main(int argc, char* argv[])
|
|||
std::vector<std::thread> threads;
|
||||
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
|
||||
|
||||
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
|
||||
for(size_t i = 0; i < imagePathParts.size(); ++i)
|
||||
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
|
||||
|
||||
for(std::thread& thread : threads)
|
|
@ -1,3 +1,22 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
35
SmartCrop/readfile.h
Normal file
35
SmartCrop/readfile.h
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
inline std::string readFile(const std::filesystem::path& path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if(!file.is_open())
|
||||
throw std::runtime_error(std::string("could not open file ") + path.string());
|
||||
std::stringstream ss;
|
||||
ss<<file.rdbuf();
|
||||
return ss.str();
|
||||
}
|
|
@ -1,3 +1,23 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "seamcarving.h"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
|
@ -1,3 +1,22 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
46
SmartCrop/tokenize.cpp
Normal file
46
SmartCrop/tokenize.cpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "tokenize.h"
|
||||
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string token;
|
||||
bool inBaracket = false;
|
||||
for(size_t i = 0; i < str.size(); ++i)
|
||||
{
|
||||
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
|
||||
{
|
||||
tokens.push_back(token);
|
||||
token.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
token.push_back(str[i]);
|
||||
}
|
||||
if(ignoreBraket == str[i])
|
||||
inBaracket = !inBaracket;
|
||||
}
|
||||
if(!inBaracket)
|
||||
tokens.push_back(token);
|
||||
return tokens;
|
||||
}
|
26
SmartCrop/tokenize.h
Normal file
26
SmartCrop/tokenize.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
|
||||
const char escapeChar = '\0');
|
|
@ -1,3 +1,23 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <filesystem>
|
34
SmartCrop/utils.h
Normal file
34
SmartCrop/utils.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path);
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
|
|
@ -1,3 +1,23 @@
|
|||
//
|
||||
// SmartCrop - A tool for content aware croping of images
|
||||
// Copyright (C) 2024 Carl Philipp Klemm
|
||||
//
|
||||
// This file is part of SmartCrop.
|
||||
//
|
||||
// SmartCrop is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// SmartCrop is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
@ -11,8 +31,8 @@
|
|||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCTXT(defaultClasses, "../classes.txt");
|
||||
INCBIN(defaultModel, "../yolov8x.onnx");
|
||||
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
|
||||
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
|
||||
|
||||
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
||||
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
|
@ -1,3 +1,22 @@
|
|||
/* * SmartCrop - A tool for content aware croping of images
|
||||
* Copyright (C) 2024 Carl Philipp Klemm
|
||||
*
|
||||
* This file is part of SmartCrop.
|
||||
*
|
||||
* SmartCrop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* SmartCrop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
|
@ -1,18 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
class InteligentRoi
|
||||
{
|
||||
private:
|
||||
int personId;
|
||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
|
||||
public:
|
||||
InteligentRoi(const Yolo& yolo);
|
||||
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
};
|
16
readfile.h
16
readfile.h
|
@ -1,16 +0,0 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
inline std::string readFile(const std::filesystem::path& path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if(!file.is_open())
|
||||
throw std::runtime_error(std::string("could not open file ") + path.string());
|
||||
std::stringstream ss;
|
||||
ss<<file.rdbuf();
|
||||
return ss.str();
|
||||
}
|
26
tokenize.cpp
26
tokenize.cpp
|
@ -1,26 +0,0 @@
|
|||
#include "tokenize.h"
|
||||
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string token;
|
||||
bool inBaracket = false;
|
||||
for(size_t i = 0; i < str.size(); ++i)
|
||||
{
|
||||
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
|
||||
{
|
||||
tokens.push_back(token);
|
||||
token.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
token.push_back(str[i]);
|
||||
}
|
||||
if(ignoreBraket == str[i])
|
||||
inBaracket = !inBaracket;
|
||||
}
|
||||
if(!inBaracket)
|
||||
tokens.push_back(token);
|
||||
return tokens;
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
|
||||
const char escapeChar = '\0');
|
15
utils.h
15
utils.h
|
@ -1,15 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
bool isImagePath(const std::filesystem::path& path);
|
||||
|
||||
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
|
||||
|
||||
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
|
||||
|
||||
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
|
||||
|
||||
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);
|
BIN
yolov8x.onnx
BIN
yolov8x.onnx
Binary file not shown.
Loading…
Reference in a new issue