Compare commits

...

11 commits

39 changed files with 1111 additions and 110 deletions

View file

@ -1,15 +1,7 @@
cmake_minimum_required(VERSION 3.6)
project(AIImagePrepross)
find_package(OpenCV REQUIRED)
project(ImageAiUtils)
set(CMAKE_CXX_STANDARD 17)
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
add_executable(${PROJECT_NAME} ${SRC_FILES})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
add_subdirectory(SmartCrop)

View file

@ -0,0 +1,105 @@
import warnings
from deepdanbooru_onnx import DeepDanbooru
from PIL import Image
import argparse
import cv2
import os
from multiprocessing import Process, Queue
import json
from tqdm import tqdm
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
def find_image_files(path: str) -> list[str]:
paths = list()
for root, dirs, files in os.walk(path):
for filename in files:
name, extension = os.path.splitext(filename)
if extension.lower() in image_ext_ocv:
paths.append(os.path.join(root, filename))
return paths
def image_loader(paths: list[str]):
for path in paths:
name, extension = os.path.splitext(path)
extension = extension.lower()
imagebgr = cv2.imread(path)
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
if image is None:
print(f"Warning: could not load {path}")
else:
image_pil = Image.fromarray(image)
yield image_pil, path
def pipeline(queue: Queue, image_paths: list[str], device: int):
danbooru = DeepDanbooru()
for path in image_paths:
imageprompt = ""
tags = danbooru(path)
for tag in tags:
imageprompt = imageprompt + ", " + tag
queue.put({"file_name": path, "text": imageprompt})
def split_list(input_list, count):
target_length = int(len(input_list) / count)
for i in range(0, count - 1):
yield input_list[i * target_length: (i + 1) * target_length]
yield input_list[(count - 1) * target_length: len(input_list)]
def save_meta(meta_file, meta, reldir, common_description):
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
if common_description is not None:
meta["text"] = common_description + meta["text"]
meta_file.write(json.dumps(meta) + '\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser("A script to tag images via DeepDanbooru")
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
args = parser.parse_args()
nparalell = 2
image_paths = find_image_files(args.image_dir)
image_path_chunks = list(split_list(image_paths, nparalell))
print(f"Will use {nparalell} processies to create tags")
queue = Queue()
processies = list()
for i in range(0, nparalell):
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], i)))
processies[-1].start()
progress = tqdm(desc="Generateing tags", total=len(image_paths))
exit = False
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
while not exit:
if not queue.empty():
meta = queue.get()
save_meta(output_file, meta, args.image_dir, args.common_description)
progress.update()
exit = True
for process in processies:
if process.is_alive():
exit = False
break
while not queue.empty():
meta = queue.get()
save_meta(output_file, meta, args.image_dir, args.common_description)
progress.update()
for process in processies:
process.join()

View file

@ -0,0 +1,3 @@
from .deepdanbooru_onnx import DeepDanbooru
from .deepdanbooru_onnx import process_image
__version__ = '0.0.8'

View file

@ -0,0 +1,244 @@
import onnxruntime as ort
from PIL import Image
import numpy as np
import os
from tqdm import tqdm
import requests
import hashlib
from typing import List, Union
import shutil
from pathlib import Path
def process_image(image: Image.Image) -> np.ndarray:
"""
Convert an image to a numpy array.
:param image: the image to convert
:return: the numpy array
"""
image = image.convert("RGB").resize((512, 512))
image = np.array(image).astype(np.float32) / 255
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
return image
def download(url: str, save_path: str, md5: str, length: str) -> bool:
"""
Download a file from url to save_path.
If the file already exists, check its md5.
If the md5 matches, return True,if the md5 doesn't match, return False.
:param url: the url of the file to download
:param save_path: the path to save the file
:param md5: the md5 of the file
:param length: the length of the file
:return: True if the file is downloaded successfully, False otherwise
"""
try:
response = requests.get(url=url, stream=True)
with open(save_path, "wb") as f:
with tqdm.wrapattr(
response.raw, "read", total=length, desc="Downloading"
) as r_raw:
shutil.copyfileobj(r_raw, f)
return (
True
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
else False
)
except Exception as e:
print(e)
return False
def download_model():
"""
Download the model and tags file from the server.
:return: the path to the model and tags file
"""
model_url = (
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
)
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
model_length = 643993025
tags_length = 133810
home = str(Path.home()) + "/.deepdanbooru_onnx/"
if not os.path.exists(home):
os.mkdir(home)
model_name = "deepdanbooru.onnx"
tags_name = "tags.txt"
model_path = home + model_name
tags_path = home + tags_name
if os.path.exists(model_path):
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
os.remove(model_path)
if not download(model_url, model_path, model_md5, model_length):
raise ValueError("Model download failed")
else:
if not download(model_url, model_path, model_md5, model_length):
raise ValueError("Model download failed")
if os.path.exists(tags_path):
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
os.remove(tags_path)
if not download(tags_url, tags_path, tags_md5, tags_length):
raise ValueError("Tags download failed")
else:
if not download(tags_url, tags_path, tags_md5, tags_length):
raise ValueError("Tags download failed")
return model_path, tags_path
class DeepDanbooru:
def __init__(
self,
mode: str = "auto",
model_path: Union[str, None] = None,
tags_path: Union[str, None] = None,
threshold: Union[float, int] = 0.6,
pin_memory: bool = False,
batch_size: int = 1,
):
"""
Initialize the DeepDanbooru class.
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
:param model_path: the path to the model file
:param tags_path: the path to the tags file
:param threshold: the threshold of the model
:param pin_memory: whether to use pin memory
:param batch_size: the batch size of the model
"""
providers = {
"cpu": "CPUExecutionProvider",
"gpu": "CUDAExecutionProvider",
"tensorrt": "TensorrtExecutionProvider",
"auto": (
"CUDAExecutionProvider"
if "CUDAExecutionProvider" in ort.get_available_providers()
else "CPUExecutionProvider"
),
}
if not (isinstance(threshold, float) or isinstance(threshold, int)):
raise TypeError("threshold must be float or int")
if threshold < 0 or threshold > 1:
raise ValueError("threshold must be between 0 and 1")
if mode not in providers:
raise ValueError(
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
)
if providers[mode] not in ort.get_available_providers():
raise ValueError(
f"Your device is not supported {mode}. Please choose from: cpu"
)
if model_path is not None and not os.path.exists(model_path):
raise FileNotFoundError("Model file not found")
if tags_path is not None and not os.path.exists(tags_path):
raise FileNotFoundError("Tags file not found")
if model_path is None or tags_path is None:
model_path, tags_path = download_model()
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
self.input_name = self.session.get_inputs()[0].name
self.output_name = [output.name for output in self.session.get_outputs()]
self.threshold = threshold
self.pin_memory = pin_memory
self.batch_size = batch_size
self.mode = mode
self.cache = {}
def __str__(self) -> str:
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
def __repr__(self) -> str:
return self.__str__()
def from_image_inference(self, image: Image.Image) -> dict:
image = process_image(image)
return self.predict(image)
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
if image.shape != (1, 512, 512, 3):
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
return self.predict(image)
def from_file_inference(self, image: str) -> dict:
return self.from_image_inference(Image.open(image))
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
if self.pin_memory:
image = [process_image(Image.open(i)) for i in image]
for i in [
image[i : i + self.batch_size]
for i in range(0, len(image), self.batch_size)
]:
imagelist = i
bs = len(i)
_imagelist, idx, hashlist = [], [], []
for j in range(len(i)):
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
hashlist.append(image_hash)
if image_hash in self.cache:
continue
if not self.pin_memory:
_imagelist.append(process_image(img))
else:
_imagelist.append(imagelist[j])
idx.append(j)
imagelist = _imagelist
if len(imagelist) != 0:
_image = np.vstack(imagelist)
results = self.inference(_image)
results_idx = 0
else:
results = []
for i in range(bs):
image_tag = {}
if i in idx:
hash = hashlist[i]
for tag, score in zip(self.tags, results[results_idx]):
if score >= self.threshold:
image_tag[tag] = score
results_idx += 1
self.cache[hash] = image_tag
yield image_tag
else:
yield self.cache[hashlist[i]]
def inference(self, image):
return self.session.run(self.output_name, {self.input_name: image})[0]
def predict(self, image):
result = self.inference(image)
image_tag = {}
for tag, score in zip(self.tags, result[0]):
if score >= self.threshold:
image_tag[tag] = score
return image_tag
def __call__(self, image) -> Union[dict, List[dict]]:
if isinstance(image, str):
return self.from_file_inference(image)
elif isinstance(image, np.ndarray):
return self.from_ndarray_inferece(image)
elif isinstance(image, list) or isinstance(image, tuple):
return self.from_list_inference(image)
elif isinstance(image, Image.Image):
return self.from_image_inference(image)
else:
raise ValueError("Image must be a file path or a numpy array or list/tuple")

21
LLavaTagger/README.md Normal file
View file

@ -0,0 +1,21 @@
# LLavaTagger
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
## How to use
first create a python venv and install the required packages into it:
$ python -m venv venv
$ source venv/bin/activate
$ pip install -r requirements.txt
Then run LLavaTagger for instance like so:
$ python LLavaTagger.py --common_description "a image of a cat, " --prompt "describe the cat in 10 to 20 words" --batch 8 --quantize --image_dir ~/cat_images
By default LLavaTagger will run in parallel on all available gpus, if this is undesriable please use the ROCR_VISIBLE_DEVICES= or CUDA_VISIBLE_DEVICES= environment variable to hide unwanted gpus
LLavaTagger will then create a meta.jsonl in the image directory sutable to be used by the scripts of [diffusers](https://github.com/huggingface/diffusers) to train stable diffusion (xl) if other formats are desired ../utils contains scripts to transform the metadata into other formats for instace for the use with [kohya](https://github.com/bmaltais/kohya_ss)
If editing the created tags is desired, [QImageTagger](https://uvos.xyz/git/uvos/QImageTagger) can be used for this purpose

View file

@ -0,0 +1,11 @@
accelerate==0.29.0
bitsandbytes
huggingface-hub==0.22.2
ninja==1.11.1.1
safetensors==0.4.2
tokenizers==0.15.2
transformers
torch
opencv-python
numpy
tqdm

View file

@ -0,0 +1,174 @@
#!/bin/python3
# PersonDatasetAssembler - A tool to assmble images of a specific person from a
# directory of images or from a video file
# Copyright (C) 2024 Carl Philipp Klemm
#
# This file is part of PersonDatasetAssembler.
#
# PersonDatasetAssembler is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PersonDatasetAssembler is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PersonDatasetAssembler. If not, see <http://www.gnu.org/licenses/>.
import argparse
import os
from typing import Iterator
import cv2
import numpy
from tqdm import tqdm
from wand.exceptions import BlobError
from wand.image import Image
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
image_ext_wand = [".dng", ".arw"]
class LoadException(Exception):
pass
def find_image_files(path: str) -> list[str]:
paths = list()
for root, dirs, files in os.walk(path):
for filename in files:
name, extension = os.path.splitext(filename)
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
paths.append(os.path.join(root, filename))
return paths
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
for path in paths:
name, extension = os.path.splitext(path)
extension = extension.lower()
if extension in image_ext_ocv:
image = cv2.imread(path)
if image is None:
print(f"Warning: could not load {path}")
else:
yield image
elif extension in image_ext_wand:
try:
image = Image(filename=path)
except BlobError as e:
print(f"Warning: could not load {path}, {e}")
continue
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
ret = True
frame_counter = 0
while ret:
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
ret, frame = video.read()
if ret:
yield frame
frame_counter += interval
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
detector.setInputSize([image.shape[1], image.shape[0]])
faces = detector.detect(image)[1]
if faces is None:
return 0, False
for face in faces:
cropped_image = recognizer.alignCrop(image, face)
features = recognizer.feature(cropped_image)
score_accum = 0.0
for referance in referance_features:
score_accum += recognizer.match(referance, features, 0)
score = score_accum / len(referance_features)
if score > thresh:
return score, True
return 0, False
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
images = list()
out = list()
if os.path.isfile(referance_path):
image = cv2.imread(referance_path)
if image is None:
print(f"Could not load image from {referance_path}")
else:
images.append(image)
elif os.path.isdir(referance_path):
filenames = find_image_files(referance_path)
images = list(image_loader(filenames))
for image in images:
detector.setInputSize([image.shape[1], image.shape[0]])
faces = detector.detect(image)[1]
if faces is None:
print("unable to find face in referance image")
exit(1)
image = recognizer.alignCrop(image, faces[0])
features = recognizer.feature(image)
out.append(features)
return out
if __name__ == "__main__":
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
args = parser.parse_args()
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
referance_features = process_referance(detector, recognizer, args.referance)
if len(referance_features) < 1:
print(f"Could not load any referance image(s) from {args.referance}")
exit(1)
if os.path.isfile(args.input):
video = cv2.VideoCapture(args.input)
if not video.isOpened():
print(f"Unable to open {args.input} as a video file")
exit(1)
image_generator = extract_video_images(video, args.skip + 1)
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
elif os.path.isdir(args.input):
image_filenams = find_image_files(args.input)
image_generator = image_loader(image_filenams)
total_images = len(image_filenams)
else:
print(f"{args.input} is not a video file nor is it a directory")
exit(1)
os.makedirs(args.out, exist_ok=True)
progress = tqdm(total=int(total_images), desc="0.00")
counter = 0
for image in image_generator:
if image.shape[0] > 512:
aspect = image.shape[0] / image.shape[1]
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
else:
resized = image
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
if match and not args.invert or not match and args.invert:
filename = f"{counter:04}.png"
cv2.imwrite(os.path.join(args.out, filename), image)
counter += 1
progress.set_description(f"{score:1.2f}")
progress.update()

View file

@ -0,0 +1,20 @@
### PersonDatasetAssembler
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
## How to use
first create a python venv and install the required packages into it:
$ python -m venv venv
$ source venv/bin/activate
$ pip install -r requirements.txt
Then run PersonDatasetAssembler for instance like so:
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx --input ~/Photos --out imagesOfSomePerson
Or to extract images from a video:
$ python PersonDatasetAssembler.py --referance someperson.jpg --match_model ../Weights/face_recognition_sface_2021dec.onnx --detect_model ../Weights/face_detection_yunet_2023mar.onnx -i ~/SomeVideo.mkv --out imagesOfSomePerson

View file

@ -0,0 +1,4 @@
numpy==1.26.4
opencv-python==4.10.0.82
tqdm==4.66.4
Wand==0.6.13

35
README.md Normal file
View file

@ -0,0 +1,35 @@
# SDImagePreprocess
This repo contains a collection of high performance tools intended to ease the createion of datasets for image generation AI training like stable diffusion.
## Included tools
This repo contains the following tools:
### SmartCrop
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
#### Content detected in image:
![Content found in image](SmartCrop/images/IMGP3692.jpg)
#### Cropped image based on content:
![Cropped image](SmartCrop/images/IMGP3692C.jpg)
### PersonDatasetAssembler
PersonDatasetAssembler is a python script that finds images of a spcific person, specified by a referance image in a directory of images or in a video file. PersonDatasetAssembler supports also raw images.
### LLavaTagger
LLavaTagger is a python script that tags images based on a given prompt using the [LLaVA](https://llava-vl.github.io/) multi modal llm. LLavaTagger supports using any number of gpus in ddp parralel for this task.
### DanbooruTagger
DanbooruTagger is a python script of dubious utility that tags images based using the [DeepDanbooru](https://github.com/KichangKim/DeepDanbooru) convolutional network.
## License
All files in this repo are litcenced GPL V3, see LICENSE

16
SmartCrop/CMakeLists.txt Normal file
View file

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.6)
find_package(OpenCV REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
add_executable(smartcrop ${SRC_FILES})
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
target_compile_options(smartcrop PRIVATE -s -g -Wall)
message(WARNING ${WEIGHT_DIR})
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
install(TARGETS smartcrop RUNTIME DESTINATION bin)

50
SmartCrop/README.md Normal file
View file

@ -0,0 +1,50 @@
# SmartCrop
SmartCrop is an application that uses content aware croping using, [seam carving](https://en.wikipedia.org/wiki/Seam_carving) and resizeing to bring a directory of images into the deisred size and aspect ratio for training. SmartCrop ist configurable to prioritize specific items or specifc persons in the images provided.
## Requirements
* [cmake](https://cmake.org/) 3.6 or later
* [opencv](https://opencv.org/) 4.8 or later
* A c++17 capable compiler and standard lib like gcc or llvm/clang
* git is required to get the source
## Building
The steps to build this application are:
$ git clone https://uvos.xyz/git/uvos/SDImagePreprocess.git
$ cd SDImagePreprocess
$ mkdir build
$ cmake ..
$ make
The binary can then be found in build/SmartCrop and can optionaly be installed with:
$ sudo make install
## Basic usage
To process all images in the directory ~/images and output the images into ~/proceesedImages:
$ smartcrop --out processedImages ~/images/*
To also focus on the person in the image ~/person.jpg
$ smartcrop --out processedImages --focus-person ~/person.jpg ~/images/*
To also enable seam carving
$ smartcrop --out processedImages --focus-person ~/person.jpg --seam-carving ~/images/*
see smartcrop --help for more
## Example
#### Content detected in image:
![Content found in image](images/IMGP3692.jpg)
#### Cropped image based on content:
![Cropped image](images/IMGP3692C.jpg)

View file

@ -1,11 +1,31 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "facerecognizer.h"
#include <filesystem>
#define INCBIN_PREFIX r
#include "incbin.h"
INCBIN(defaultRecognizer, "../face_recognition_sface_2021dec.onnx");
INCBIN(defaultDetector, "../face_detection_yunet_2023mar.onnx");
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/core.hpp>
@ -106,28 +126,35 @@ void FaceRecognizer::clearReferances()
referanceFeatures.clear();
}
std::pair<int, double> FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
{
cv::Mat faces = detectFaces(input);
if(alone && faces.rows > 1)
return {-2, 0};
Detection bestMatch;
bestMatch.confidence = 0;
bestMatch.person = -1;
std::pair<int, double> bestMatch = {-1, 0};
if(alone && faces.rows > 1)
{
bestMatch.person = -2;
return bestMatch;
}
for(int i = 0; i < faces.rows; ++i)
{
cv::Mat face;
recognizer->alignCrop(input, faces.row(0), face);
recognizer->alignCrop(input, faces.row(i), face);
cv::Mat features;
recognizer->feature(face, features);
features = features.clone();
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
{
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
if(score > threshold && score > bestMatch.second)
if(score > threshold && score > bestMatch.confidence)
{
bestMatch = {referanceIndex, score};
bestMatch.confidence = score;
bestMatch.person = referanceIndex;
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
}
}
}

View file

@ -1,3 +1,22 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <exception>
#include <opencv2/core/mat.hpp>
@ -11,6 +30,13 @@ class FaceRecognizer
{
public:
struct Detection
{
int person;
float confidence;
cv::Rect rect;
};
class LoadException : public std::exception
{
private:
@ -33,7 +59,7 @@ private:
public:
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
cv::Mat detectFaces(const cv::Mat& input);
std::pair<int, double> isMatch(const cv::Mat& input, bool alone = false);
Detection isMatch(const cv::Mat& input, bool alone = false);
bool addReferances(const std::vector<cv::Mat>& referances);
void setThreshold(double threashold);
double getThreshold();

View file

@ -1,3 +1,22 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file incbin.h
* @author Dale Weiler

View file

@ -1,3 +1,23 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "intelligentroi.h"
#include <opencv2/imgproc.hpp>

View file

@ -0,0 +1,37 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <opencv2/imgproc.hpp>
#include "yolo.h"
class InteligentRoi
{
private:
int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public:
InteligentRoi(const Yolo& yolo);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
};

View file

@ -1,3 +1,23 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include <filesystem>
#include <iostream>
#include <opencv2/core.hpp>
@ -243,12 +263,12 @@ static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
if(image.cols > image.rows)
{
double ratio = static_cast<double>(longTargetSize)/image.cols;
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
else
{
double ratio = static_cast<double>(longTargetSize)/image.rows;
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, cv::INTER_CUBIC);
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
}
}
}
@ -276,12 +296,13 @@ void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yol
{
cv::Mat person = image(detection.box);
reconizerMutex.lock();
std::pair<int, double> match = recognizer->isMatch(person);
FaceRecognizer::Detection match = recognizer->isMatch(person);
reconizerMutex.unlock();
if(match.first >= 0)
if(match.person >= 0)
{
detection.priority += 10;
hasmatch = true;
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
}
}
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
@ -429,7 +450,7 @@ int main(int argc, char* argv[])
std::vector<std::thread> threads;
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
for(size_t i = 0; i < imagePathParts.size(); ++i)
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
for(std::thread& thread : threads)

View file

@ -1,3 +1,22 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>

35
SmartCrop/readfile.h Normal file
View file

@ -0,0 +1,35 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <sstream>
inline std::string readFile(const std::filesystem::path& path)
{
std::ifstream file(path);
if(!file.is_open())
throw std::runtime_error(std::string("could not open file ") + path.string());
std::stringstream ss;
ss<<file.rdbuf();
return ss.str();
}

View file

@ -1,3 +1,23 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "seamcarving.h"
#include <opencv2/imgcodecs.hpp>

View file

@ -1,3 +1,22 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <opencv2/core/core.hpp>

46
SmartCrop/tokenize.cpp Normal file
View file

@ -0,0 +1,46 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "tokenize.h"
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
{
std::vector<std::string> tokens;
std::string token;
bool inBaracket = false;
for(size_t i = 0; i < str.size(); ++i)
{
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
{
tokens.push_back(token);
token.clear();
}
else
{
token.push_back(str[i]);
}
if(ignoreBraket == str[i])
inBaracket = !inBaracket;
}
if(!inBaracket)
tokens.push_back(token);
return tokens;
}

26
SmartCrop/tokenize.h Normal file
View file

@ -0,0 +1,26 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
const char escapeChar = '\0');

View file

@ -1,3 +1,23 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include "utils.h"
#include <filesystem>

34
SmartCrop/utils.h Normal file
View file

@ -0,0 +1,34 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path);
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);

View file

@ -1,3 +1,23 @@
//
// SmartCrop - A tool for content aware croping of images
// Copyright (C) 2024 Carl Philipp Klemm
//
// This file is part of SmartCrop.
//
// SmartCrop is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SmartCrop is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
//
#include <opencv2/dnn/dnn.hpp>
#include <algorithm>
#include <string>
@ -11,8 +31,8 @@
#define INCBIN_PREFIX r
#include "incbin.h"
INCTXT(defaultClasses, "../classes.txt");
INCBIN(defaultModel, "../yolov8x.onnx");
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)

View file

@ -1,3 +1,22 @@
/* * SmartCrop - A tool for content aware croping of images
* Copyright (C) 2024 Carl Philipp Klemm
*
* This file is part of SmartCrop.
*
* SmartCrop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SmartCrop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SmartCrop. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <fstream>

View file

@ -1,18 +0,0 @@
#pragma once
#include <opencv2/imgproc.hpp>
#include "yolo.h"
class InteligentRoi
{
private:
int personId;
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
public:
InteligentRoi(const Yolo& yolo);
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
};

View file

@ -1,16 +0,0 @@
#pragma once
#include <string>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <sstream>
inline std::string readFile(const std::filesystem::path& path)
{
std::ifstream file(path);
if(!file.is_open())
throw std::runtime_error(std::string("could not open file ") + path.string());
std::stringstream ss;
ss<<file.rdbuf();
return ss.str();
}

View file

@ -1,26 +0,0 @@
#include "tokenize.h"
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket, const char escapeChar)
{
std::vector<std::string> tokens;
std::string token;
bool inBaracket = false;
for(size_t i = 0; i < str.size(); ++i)
{
if(str[i] == delim && !inBaracket && (i == 0 || str[i-1] != escapeChar))
{
tokens.push_back(token);
token.clear();
}
else
{
token.push_back(str[i]);
}
if(ignoreBraket == str[i])
inBaracket = !inBaracket;
}
if(!inBaracket)
tokens.push_back(token);
return tokens;
}

View file

@ -1,7 +0,0 @@
#pragma once
#include <string>
#include <vector>
std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char delim, const char ignoreBraket = '\0',
const char escapeChar = '\0');

15
utils.h
View file

@ -1,15 +0,0 @@
#pragma once
#include <filesystem>
#include <vector>
#include <opencv2/imgproc.hpp>
bool isImagePath(const std::filesystem::path& path);
void getImageFiles(const std::filesystem::path& path, std::vector<std::filesystem::path>& paths);
cv::Rect rectFromPoints(const std::vector<std::pair<cv::Point, int>>& points);
double pointDist(const cv::Point2i& pointA, const cv::Point2i& pointB);
bool pointInRect(const cv::Point2i& point, const cv::Rect& rect);

Binary file not shown.