Compare commits
10 commits
438c9d726c
...
ce3279254f
Author | SHA1 | Date | |
---|---|---|---|
|
ce3279254f | ||
|
55953bcdb7 | ||
|
f97f4640a9 | ||
|
03e2b3119a | ||
|
81475815fb | ||
|
35cfa8a906 | ||
|
a279001151 | ||
|
b2ffbfa530 | ||
|
b3c2d585ae | ||
![]() |
f5dad284e6 |
|
@ -1,15 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.6)
|
||||
project(AIImagePrepross)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
project(ImageAiUtils)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SRC_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
|
||||
|
||||
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
|
||||
add_subdirectory(SmartCrop)
|
||||
|
|
141
DanbooruTagger/DanbooruTagger.py
Normal file
141
DanbooruTagger/DanbooruTagger.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import warnings
|
||||
from deepdanbooru_onnx import DeepDanbooru
|
||||
import argparse
|
||||
import cv2
|
||||
import torch
|
||||
import os
|
||||
import numpy
|
||||
from typing import Iterator
|
||||
from torch.multiprocessing import Process, Queue
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
imagebgr = cv2.imread(path)
|
||||
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
yield image, path
|
||||
|
||||
|
||||
def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
|
||||
model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
|
||||
quantization_config=BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
bnb_4bit_use_double_quant=False,
|
||||
bnb_4bit_quant_type='nf4',
|
||||
), device_map=device, attn_implementation="flash_attention_2")
|
||||
processor = AutoProcessor.from_pretrained(model_name_or_path)
|
||||
image_generator = image_loader(image_paths)
|
||||
|
||||
stop = False
|
||||
finished_count = 0
|
||||
while not stop:
|
||||
prompts = list()
|
||||
images = list()
|
||||
filenames = list()
|
||||
for i in range(0, batch_size):
|
||||
image, filename = next(image_generator, (None, None))
|
||||
if image is None:
|
||||
stop = True
|
||||
break
|
||||
|
||||
filenames.append(filename)
|
||||
images.append(image)
|
||||
prompts.append(prompt)
|
||||
|
||||
if len(images) == 0:
|
||||
break
|
||||
|
||||
inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
|
||||
generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
|
||||
decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
||||
finished_count += len(images)
|
||||
for i, decoded in enumerate(decodes):
|
||||
trim = len(prompt) - len("<image>")
|
||||
queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
|
||||
|
||||
|
||||
def split_list(input_list, count):
|
||||
target_length = int(len(input_list) / count)
|
||||
for i in range(0, count - 1):
|
||||
yield input_list[i * target_length: (i + 1) * target_length]
|
||||
yield input_list[(count - 1) * target_length: len(input_list)]
|
||||
|
||||
|
||||
def save_meta(meta_file, meta, reldir, common_description):
|
||||
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
|
||||
if common_description is not None:
|
||||
meta["text"] = common_description + meta["text"]
|
||||
meta_file.write(json.dumps(meta) + '\n')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("A script to tag images via llava")
|
||||
parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
|
||||
parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
|
||||
parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
|
||||
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
|
||||
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
|
||||
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
|
||||
args = parser.parse_args()
|
||||
|
||||
prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
|
||||
os.environ["BITSANDBYTES_NOWELCOME"] = "1"
|
||||
|
||||
image_paths = find_image_files(args.image_dir)
|
||||
image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
|
||||
|
||||
print(f"Will use {torch.cuda.device_count()} processies to create tags")
|
||||
|
||||
logging.set_verbosity_error()
|
||||
warnings.filterwarnings("ignore")
|
||||
torch.multiprocessing.set_start_method('spawn')
|
||||
|
||||
queue = Queue()
|
||||
processies = list()
|
||||
for i in range(0, torch.cuda.device_count()):
|
||||
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
|
||||
processies[-1].start()
|
||||
|
||||
progress = tqdm(desc="Generateing tags", total=len(image_paths))
|
||||
exit = False
|
||||
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
|
||||
while not exit:
|
||||
if not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
exit = True
|
||||
for process in processies:
|
||||
if process.is_alive():
|
||||
exit = False
|
||||
break
|
||||
|
||||
while not queue.empty():
|
||||
meta = queue.get()
|
||||
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||
progress.update()
|
||||
|
||||
for process in processies:
|
||||
process.join()
|
||||
|
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from .deepdanbooru_onnx import DeepDanbooru
|
||||
from .deepdanbooru_onnx import process_image
|
||||
__version__ = '0.0.8'
|
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
|
@ -0,0 +1,244 @@
|
|||
import onnxruntime as ort
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import hashlib
|
||||
from typing import List, Union
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def process_image(image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Convert an image to a numpy array.
|
||||
:param image: the image to convert
|
||||
:return: the numpy array
|
||||
"""
|
||||
|
||||
image = image.convert("RGB").resize((512, 512))
|
||||
image = np.array(image).astype(np.float32) / 255
|
||||
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
|
||||
return image
|
||||
|
||||
|
||||
def download(url: str, save_path: str, md5: str, length: str) -> bool:
|
||||
"""
|
||||
Download a file from url to save_path.
|
||||
If the file already exists, check its md5.
|
||||
If the md5 matches, return True,if the md5 doesn't match, return False.
|
||||
:param url: the url of the file to download
|
||||
:param save_path: the path to save the file
|
||||
:param md5: the md5 of the file
|
||||
:param length: the length of the file
|
||||
:return: True if the file is downloaded successfully, False otherwise
|
||||
"""
|
||||
|
||||
try:
|
||||
response = requests.get(url=url, stream=True)
|
||||
with open(save_path, "wb") as f:
|
||||
with tqdm.wrapattr(
|
||||
response.raw, "read", total=length, desc="Downloading"
|
||||
) as r_raw:
|
||||
shutil.copyfileobj(r_raw, f)
|
||||
return (
|
||||
True
|
||||
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
|
||||
else False
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
|
||||
def download_model():
|
||||
"""
|
||||
Download the model and tags file from the server.
|
||||
:return: the path to the model and tags file
|
||||
"""
|
||||
|
||||
model_url = (
|
||||
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
|
||||
)
|
||||
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
|
||||
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
|
||||
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
|
||||
model_length = 643993025
|
||||
tags_length = 133810
|
||||
|
||||
home = str(Path.home()) + "/.deepdanbooru_onnx/"
|
||||
if not os.path.exists(home):
|
||||
os.mkdir(home)
|
||||
|
||||
model_name = "deepdanbooru.onnx"
|
||||
tags_name = "tags.txt"
|
||||
|
||||
model_path = home + model_name
|
||||
tags_path = home + tags_name
|
||||
if os.path.exists(model_path):
|
||||
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
|
||||
os.remove(model_path)
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
else:
|
||||
if not download(model_url, model_path, model_md5, model_length):
|
||||
raise ValueError("Model download failed")
|
||||
|
||||
if os.path.exists(tags_path):
|
||||
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
|
||||
os.remove(tags_path)
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
else:
|
||||
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||
raise ValueError("Tags download failed")
|
||||
return model_path, tags_path
|
||||
|
||||
|
||||
class DeepDanbooru:
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = "auto",
|
||||
model_path: Union[str, None] = None,
|
||||
tags_path: Union[str, None] = None,
|
||||
threshold: Union[float, int] = 0.6,
|
||||
pin_memory: bool = False,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
"""
|
||||
Initialize the DeepDanbooru class.
|
||||
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
|
||||
:param model_path: the path to the model file
|
||||
:param tags_path: the path to the tags file
|
||||
:param threshold: the threshold of the model
|
||||
:param pin_memory: whether to use pin memory
|
||||
:param batch_size: the batch size of the model
|
||||
"""
|
||||
|
||||
providers = {
|
||||
"cpu": "CPUExecutionProvider",
|
||||
"gpu": "CUDAExecutionProvider",
|
||||
"tensorrt": "TensorrtExecutionProvider",
|
||||
"auto": (
|
||||
"CUDAExecutionProvider"
|
||||
if "CUDAExecutionProvider" in ort.get_available_providers()
|
||||
else "CPUExecutionProvider"
|
||||
),
|
||||
}
|
||||
|
||||
if not (isinstance(threshold, float) or isinstance(threshold, int)):
|
||||
raise TypeError("threshold must be float or int")
|
||||
if threshold < 0 or threshold > 1:
|
||||
raise ValueError("threshold must be between 0 and 1")
|
||||
if mode not in providers:
|
||||
raise ValueError(
|
||||
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
|
||||
)
|
||||
if providers[mode] not in ort.get_available_providers():
|
||||
raise ValueError(
|
||||
f"Your device is not supported {mode}. Please choose from: cpu"
|
||||
)
|
||||
if model_path is not None and not os.path.exists(model_path):
|
||||
raise FileNotFoundError("Model file not found")
|
||||
if tags_path is not None and not os.path.exists(tags_path):
|
||||
raise FileNotFoundError("Tags file not found")
|
||||
|
||||
if model_path is None or tags_path is None:
|
||||
model_path, tags_path = download_model()
|
||||
|
||||
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
|
||||
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
|
||||
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
self.output_name = [output.name for output in self.session.get_outputs()]
|
||||
self.threshold = threshold
|
||||
self.pin_memory = pin_memory
|
||||
self.batch_size = batch_size
|
||||
self.mode = mode
|
||||
self.cache = {}
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
def from_image_inference(self, image: Image.Image) -> dict:
|
||||
image = process_image(image)
|
||||
return self.predict(image)
|
||||
|
||||
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
|
||||
if image.shape != (1, 512, 512, 3):
|
||||
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
|
||||
return self.predict(image)
|
||||
|
||||
def from_file_inference(self, image: str) -> dict:
|
||||
return self.from_image_inference(Image.open(image))
|
||||
|
||||
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
|
||||
if self.pin_memory:
|
||||
image = [process_image(Image.open(i)) for i in image]
|
||||
for i in [
|
||||
image[i : i + self.batch_size]
|
||||
for i in range(0, len(image), self.batch_size)
|
||||
]:
|
||||
imagelist = i
|
||||
bs = len(i)
|
||||
_imagelist, idx, hashlist = [], [], []
|
||||
for j in range(len(i)):
|
||||
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
|
||||
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
|
||||
hashlist.append(image_hash)
|
||||
if image_hash in self.cache:
|
||||
continue
|
||||
if not self.pin_memory:
|
||||
_imagelist.append(process_image(img))
|
||||
else:
|
||||
_imagelist.append(imagelist[j])
|
||||
idx.append(j)
|
||||
|
||||
imagelist = _imagelist
|
||||
if len(imagelist) != 0:
|
||||
_image = np.vstack(imagelist)
|
||||
results = self.inference(_image)
|
||||
results_idx = 0
|
||||
else:
|
||||
results = []
|
||||
|
||||
for i in range(bs):
|
||||
image_tag = {}
|
||||
if i in idx:
|
||||
hash = hashlist[i]
|
||||
for tag, score in zip(self.tags, results[results_idx]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
results_idx += 1
|
||||
self.cache[hash] = image_tag
|
||||
yield image_tag
|
||||
else:
|
||||
yield self.cache[hashlist[i]]
|
||||
|
||||
def inference(self, image):
|
||||
return self.session.run(self.output_name, {self.input_name: image})[0]
|
||||
|
||||
def predict(self, image):
|
||||
result = self.inference(image)
|
||||
image_tag = {}
|
||||
for tag, score in zip(self.tags, result[0]):
|
||||
if score >= self.threshold:
|
||||
image_tag[tag] = score
|
||||
return image_tag
|
||||
|
||||
def __call__(self, image) -> Union[dict, List[dict]]:
|
||||
if isinstance(image, str):
|
||||
return self.from_file_inference(image)
|
||||
elif isinstance(image, np.ndarray):
|
||||
return self.from_ndarray_inferece(image)
|
||||
elif isinstance(image, list) or isinstance(image, tuple):
|
||||
return self.from_list_inference(image)
|
||||
elif isinstance(image, Image.Image):
|
||||
return self.from_image_inference(image)
|
||||
else:
|
||||
raise ValueError("Image must be a file path or a numpy array or list/tuple")
|
3
DanbooruTagger/example.py
Normal file
3
DanbooruTagger/example.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from deepdanbooru_onnx import DeepDanbooru
|
||||
danbooru = DeepDanbooru()
|
||||
print(danbooru("/run/media/philipp/20404acc-312c-44f2-b2d1-3a0a14257cc6/.Media/porn/00244-3145022840.png"))
|
154
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
154
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
|
@ -0,0 +1,154 @@
|
|||
#!/bin/python3
|
||||
import argparse
|
||||
import os
|
||||
from typing import Iterator
|
||||
import cv2
|
||||
import numpy
|
||||
from tqdm import tqdm
|
||||
from wand.exceptions import BlobError
|
||||
from wand.image import Image
|
||||
|
||||
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||
image_ext_wand = [".dng", ".arw"]
|
||||
|
||||
|
||||
class LoadException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def find_image_files(path: str) -> list[str]:
|
||||
paths = list()
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
name, extension = os.path.splitext(filename)
|
||||
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
|
||||
paths.append(os.path.join(root, filename))
|
||||
return paths
|
||||
|
||||
|
||||
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||
for path in paths:
|
||||
name, extension = os.path.splitext(path)
|
||||
extension = extension.lower()
|
||||
if extension in image_ext_ocv:
|
||||
image = cv2.imread(path)
|
||||
if image is None:
|
||||
print(f"Warning: could not load {path}")
|
||||
else:
|
||||
yield image
|
||||
elif extension in image_ext_wand:
|
||||
try:
|
||||
image = Image(filename=path)
|
||||
except BlobError as e:
|
||||
print(f"Warning: could not load {path}, {e}")
|
||||
continue
|
||||
|
||||
|
||||
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
|
||||
ret = True
|
||||
frame_counter = 0
|
||||
while ret:
|
||||
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
|
||||
ret, frame = video.read()
|
||||
if ret:
|
||||
yield frame
|
||||
frame_counter += interval
|
||||
|
||||
|
||||
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
return 0, False
|
||||
for face in faces:
|
||||
cropped_image = recognizer.alignCrop(image, face)
|
||||
features = recognizer.feature(cropped_image)
|
||||
score_accum = 0.0
|
||||
for referance in referance_features:
|
||||
score_accum += recognizer.match(referance, features, 0)
|
||||
score = score_accum / len(referance_features)
|
||||
if score > thresh:
|
||||
return score, True
|
||||
return 0, False
|
||||
|
||||
|
||||
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
|
||||
images = list()
|
||||
out = list()
|
||||
|
||||
if os.path.isfile(referance_path):
|
||||
image = cv2.imread(referance_path)
|
||||
if image is None:
|
||||
print(f"Could not load image from {referance_path}")
|
||||
else:
|
||||
images.append(image)
|
||||
elif os.path.isdir(referance_path):
|
||||
filenames = find_image_files(referance_path)
|
||||
images = list(image_loader(filenames))
|
||||
|
||||
for image in images:
|
||||
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||
faces = detector.detect(image)[1]
|
||||
if faces is None:
|
||||
print("unable to find face in referance image")
|
||||
exit(1)
|
||||
image = recognizer.alignCrop(image, faces[0])
|
||||
features = recognizer.feature(image)
|
||||
out.append(features)
|
||||
|
||||
return out
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
|
||||
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
|
||||
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
|
||||
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
|
||||
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
|
||||
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
|
||||
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
|
||||
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
|
||||
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
|
||||
args = parser.parse_args()
|
||||
|
||||
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
|
||||
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||
|
||||
referance_features = process_referance(detector, recognizer, args.referance)
|
||||
if len(referance_features) < 1:
|
||||
print(f"Could not load any referance image(s) from {args.referance}")
|
||||
exit(1)
|
||||
|
||||
if os.path.isfile(args.input):
|
||||
video = cv2.VideoCapture(args.input)
|
||||
if not video.isOpened():
|
||||
print(f"Unable to open {args.input} as a video file")
|
||||
exit(1)
|
||||
image_generator = extract_video_images(video, args.skip + 1)
|
||||
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
|
||||
elif os.path.isdir(args.input):
|
||||
image_filenams = find_image_files(args.input)
|
||||
image_generator = image_loader(image_filenams)
|
||||
total_images = len(image_filenams)
|
||||
else:
|
||||
print(f"{args.input} is not a video file nor is it a directory")
|
||||
exit(1)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
|
||||
progress = tqdm(total=int(total_images), desc="0.00")
|
||||
counter = 0
|
||||
for image in image_generator:
|
||||
if image.shape[0] > 512:
|
||||
aspect = image.shape[0] / image.shape[1]
|
||||
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
|
||||
else:
|
||||
resized = image
|
||||
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
|
||||
if match and not args.invert or not match and args.invert:
|
||||
filename = f"{counter:04}.png"
|
||||
cv2.imwrite(os.path.join(args.out, filename), image)
|
||||
counter += 1
|
||||
progress.set_description(f"{score:1.2f}")
|
||||
progress.update()
|
||||
|
16
SmartCrop/CMakeLists.txt
Normal file
16
SmartCrop/CMakeLists.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
cmake_minimum_required(VERSION 3.6)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||
|
||||
add_executable(smartcrop ${SRC_FILES})
|
||||
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
|
||||
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_compile_options(smartcrop PRIVATE -s -g -Wall)
|
||||
message(WARNING ${WEIGHT_DIR})
|
||||
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
|
||||
|
||||
install(TARGETS smartcrop RUNTIME DESTINATION bin)
|
143
SmartCrop/facerecognizer.cpp
Normal file
143
SmartCrop/facerecognizer.cpp
Normal file
|
@ -0,0 +1,143 @@
|
|||
#include "facerecognizer.h"
|
||||
#include <filesystem>
|
||||
|
||||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
|
||||
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <fstream>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
|
||||
|
||||
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
if(detectorPath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face detection model";
|
||||
|
||||
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from built in file");
|
||||
}
|
||||
else
|
||||
{
|
||||
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
if(!detector)
|
||||
throw LoadException("Unable to load detector network from "+detectorPath.string());
|
||||
}
|
||||
|
||||
bool defaultNetwork = recognizerPath.empty();
|
||||
|
||||
if(defaultNetwork)
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin face recognition model";
|
||||
recognizerPath = cv::tempfile("onnx");
|
||||
std::ofstream file(recognizerPath);
|
||||
if(!file.is_open())
|
||||
throw LoadException("Unable open temporary file at "+recognizerPath.string());
|
||||
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
|
||||
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
|
||||
file.close();
|
||||
}
|
||||
|
||||
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||
|
||||
if(defaultNetwork)
|
||||
std::filesystem::remove(recognizerPath);
|
||||
|
||||
if(!recognizer)
|
||||
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
|
||||
|
||||
addReferances(referances);
|
||||
}
|
||||
|
||||
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
|
||||
{
|
||||
detector->setInputSize(input.size());
|
||||
cv::Mat faces;
|
||||
detector->detect(input, faces);
|
||||
return faces;
|
||||
}
|
||||
|
||||
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
|
||||
{
|
||||
bool ret = false;
|
||||
for(const cv::Mat& image : referances)
|
||||
{
|
||||
cv::Mat faces = detectFaces(image);
|
||||
assert(faces.cols == 15);
|
||||
if(faces.empty())
|
||||
{
|
||||
Log(Log::WARN)<<"A referance image provided dose not contian any face";
|
||||
continue;
|
||||
}
|
||||
if(faces.rows > 1)
|
||||
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
|
||||
cv::Mat cropedImage;
|
||||
recognizer->alignCrop(image, faces.row(0), cropedImage);
|
||||
cv::Mat features;
|
||||
recognizer->feature(cropedImage, features);
|
||||
referanceFeatures.push_back(features.clone());
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FaceRecognizer::setThreshold(double threasholdIn)
|
||||
{
|
||||
threshold = threasholdIn;
|
||||
}
|
||||
|
||||
double FaceRecognizer::getThreshold()
|
||||
{
|
||||
return threshold;
|
||||
}
|
||||
|
||||
void FaceRecognizer::clearReferances()
|
||||
{
|
||||
referanceFeatures.clear();
|
||||
}
|
||||
|
||||
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||
{
|
||||
cv::Mat faces = detectFaces(input);
|
||||
|
||||
Detection bestMatch;
|
||||
bestMatch.confidence = 0;
|
||||
bestMatch.person = -1;
|
||||
|
||||
if(alone && faces.rows > 1)
|
||||
{
|
||||
bestMatch.person = -2;
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
for(int i = 0; i < faces.rows; ++i)
|
||||
{
|
||||
cv::Mat face;
|
||||
recognizer->alignCrop(input, faces.row(i), face);
|
||||
cv::Mat features;
|
||||
recognizer->feature(face, features);
|
||||
features = features.clone();
|
||||
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||
{
|
||||
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||
if(score > threshold && score > bestMatch.confidence)
|
||||
{
|
||||
bestMatch.confidence = score;
|
||||
bestMatch.person = referanceIndex;
|
||||
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
48
SmartCrop/facerecognizer.h
Normal file
48
SmartCrop/facerecognizer.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
#pragma once
|
||||
#include <exception>
|
||||
#include <opencv2/core/mat.hpp>
|
||||
#include <opencv2/objdetect/face.hpp>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <filesystem>
|
||||
|
||||
class FaceRecognizer
|
||||
{
|
||||
public:
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int person;
|
||||
float confidence;
|
||||
cv::Rect rect;
|
||||
};
|
||||
|
||||
class LoadException : public std::exception
|
||||
{
|
||||
private:
|
||||
std::string message;
|
||||
public:
|
||||
LoadException(const std::string& msg): std::exception(), message(msg) {}
|
||||
virtual const char* what() const throw() override
|
||||
{
|
||||
return message.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
std::vector<cv::Mat> referanceFeatures;
|
||||
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
|
||||
std::shared_ptr<cv::FaceDetectorYN> detector;
|
||||
|
||||
double threshold = 0.363;
|
||||
|
||||
public:
|
||||
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||
cv::Mat detectFaces(const cv::Mat& input);
|
||||
Detection isMatch(const cv::Mat& input, bool alone = false);
|
||||
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||
void setThreshold(double threashold);
|
||||
double getThreshold();
|
||||
void clearReferances();
|
||||
};
|
|
@ -31,11 +31,12 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
|
|||
}
|
||||
}
|
||||
|
||||
cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
||||
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
||||
{
|
||||
int radius = std::min(imageSize.height, imageSize.width)/2;
|
||||
incompleate = false;
|
||||
int diameter = std::min(imageSize.height, imageSize.width);
|
||||
cv::Point2i point(imageSize.width/2, imageSize.height/2);
|
||||
cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2);
|
||||
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
|
||||
|
||||
std::sort(mustInclude.begin(), mustInclude.end(),
|
||||
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
|
||||
|
@ -43,8 +44,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
|
|||
while(true)
|
||||
{
|
||||
cv::Rect includeRect = rectFromPoints(mustInclude);
|
||||
if(includeRect.width-2 > radius || includeRect.height-2 > radius)
|
||||
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
|
||||
{
|
||||
incompleate = true;
|
||||
slideRectToPoint(candiate, mustInclude.back().first);
|
||||
mustInclude.pop_back();
|
||||
Log(Log::DEBUG)<<"cant fill";
|
||||
|
@ -52,7 +54,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
|
|||
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
|
||||
|
@ -75,25 +79,30 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
|
|||
personId = yolo.getClassForStr("person");
|
||||
}
|
||||
|
||||
cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
||||
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
||||
{
|
||||
if(!detections.empty())
|
||||
std::vector<std::pair<cv::Point2i, int>> corners;
|
||||
for(size_t i = 0; i < detections.size(); ++i)
|
||||
{
|
||||
std::vector<std::pair<cv::Point2i, int>> corners;
|
||||
for(size_t i = 0; i < detections.size(); ++i)
|
||||
int priority = detections[i].priority;
|
||||
if(detections[i].class_id == personId)
|
||||
{
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
|
||||
corners.push_back({detections[i].box.tl(), priority+1});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
else
|
||||
{
|
||||
int priority = detections[i].priority;
|
||||
if(detections[i].class_id == personId)
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
|
||||
corners.push_back({detections[i].box.tl(), priority});
|
||||
corners.push_back({detections[i].box.br(), priority});
|
||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
|
||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||
}
|
||||
|
||||
return maxRect(imageSize, corners);
|
||||
}
|
||||
|
||||
Log(Log::DEBUG)<<"Using center crop as there are no detections";
|
||||
return maxRect(imageSize);
|
||||
bool incompleate;
|
||||
out = maxRect(incompleate, imageSize, corners);
|
||||
return incompleate;
|
||||
}
|
|
@ -10,9 +10,9 @@ private:
|
|||
int personId;
|
||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||
static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||
|
||||
public:
|
||||
InteligentRoi(const Yolo& yolo);
|
||||
cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||
};
|
440
SmartCrop/main.cpp
Normal file
440
SmartCrop/main.cpp
Normal file
|
@ -0,0 +1,440 @@
|
|||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "log.h"
|
||||
#include "options.h"
|
||||
#include "utils.h"
|
||||
#include "intelligentroi.h"
|
||||
#include "seamcarving.h"
|
||||
#include "facerecognizer.h"
|
||||
|
||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||
{
|
||||
const Yolo::Detection* inDetection = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(ignore && ignore == &detection)
|
||||
continue;
|
||||
|
||||
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
|
||||
{
|
||||
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
||||
inDetection = &detection;
|
||||
}
|
||||
}
|
||||
return inDetection;
|
||||
}
|
||||
|
||||
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
||||
{
|
||||
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
||||
|
||||
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
|
||||
|
||||
if(!inDetection)
|
||||
{
|
||||
const Yolo::Detection* closest = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(detection.box.x > x)
|
||||
{
|
||||
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
||||
closest = &detection;
|
||||
}
|
||||
}
|
||||
if(closest)
|
||||
x = closest->box.x;
|
||||
else
|
||||
x = imgSizeX;
|
||||
|
||||
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = inDetection->box.br().x;
|
||||
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
|
||||
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
||||
if(candidateDetection && candidateDetection->box.br().x > x)
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is again in a box";
|
||||
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::DEBUG)<<"it is not in a box";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> out;
|
||||
|
||||
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
|
||||
|
||||
for(int x = 0; x < image.cols; ++x)
|
||||
{
|
||||
int start = x;
|
||||
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
||||
|
||||
int width = x-start;
|
||||
if(x < image.cols)
|
||||
++width;
|
||||
cv::Rect rect(start, 0, width, image.rows);
|
||||
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
|
||||
cv::Mat slice = image(rect);
|
||||
out.push_back({slice, frozen});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
|
||||
{
|
||||
assert(!slices.empty());
|
||||
|
||||
int cols = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
cols += slice.first.cols;
|
||||
|
||||
|
||||
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
|
||||
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
|
||||
|
||||
int col = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
|
||||
Log(Log::DEBUG)<<__func__<<' '<<rect;
|
||||
slice.first.copyTo(image(rect));
|
||||
col += slice.first.cols-1;
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
void transposeRect(cv::Rect& rect)
|
||||
{
|
||||
int x = rect.x;
|
||||
rect.x = rect.y;
|
||||
rect.y = x;
|
||||
|
||||
int width = rect.width;
|
||||
rect.width = rect.height;
|
||||
rect.height = width;
|
||||
}
|
||||
|
||||
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
|
||||
{
|
||||
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
|
||||
|
||||
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
||||
|
||||
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
|
||||
|
||||
bool vertical = false;
|
||||
if(aspectRatio > targetAspectRatio)
|
||||
vertical = true;
|
||||
|
||||
int requiredLines = 0;
|
||||
if(!vertical)
|
||||
requiredLines = image.rows*targetAspectRatio - image.cols;
|
||||
else
|
||||
requiredLines = image.cols/targetAspectRatio - image.rows;
|
||||
|
||||
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
||||
|
||||
if(vertical)
|
||||
{
|
||||
cv::transpose(image, image);
|
||||
for(Yolo::Detection& detection : detections)
|
||||
transposeRect(detection.box);
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
||||
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
|
||||
int totalResizableSize = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
|
||||
if(!slice.second)
|
||||
totalResizableSize += slice.first.cols;
|
||||
}
|
||||
|
||||
if(totalResizableSize < requiredLines+1)
|
||||
{
|
||||
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<int> seamsForSlice(slices.size(), 0);
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
||||
}
|
||||
|
||||
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
|
||||
for(ssize_t i = slices.size()-1; i >= 0; --i)
|
||||
{
|
||||
if(!slices[i].second)
|
||||
{
|
||||
seamsForSlice[i] += residual;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
if(seamsForSlice[i] != 0)
|
||||
{
|
||||
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
|
||||
if(!ret)
|
||||
{
|
||||
if(vertical)
|
||||
transpose(image, image);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image = assembleFromSlicesHoriz(slices);
|
||||
|
||||
if(vertical)
|
||||
cv::transpose(image, image);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
cv::rectangle(image, detection.box, detection.color, 3);
|
||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
||||
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
|
||||
}
|
||||
|
||||
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
||||
}
|
||||
|
||||
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
||||
{
|
||||
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
|
||||
if(std::max(image.cols, image.rows) > longTargetSize)
|
||||
{
|
||||
if(image.cols > image.rows)
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.cols;
|
||||
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
double ratio = static_cast<double>(longTargetSize)/image.rows;
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
InteligentRoi intRoi(yolo);
|
||||
cv::Mat image = cv::imread(path);
|
||||
if(!image.data)
|
||||
{
|
||||
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
||||
return;
|
||||
}
|
||||
|
||||
reduceSize(image, config.targetSize);
|
||||
|
||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||
|
||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||
for(Yolo::Detection& detection : detections)
|
||||
{
|
||||
bool hasmatch = false;
|
||||
if(recognizer && detection.className == "person")
|
||||
{
|
||||
cv::Mat person = image(detection.box);
|
||||
reconizerMutex.lock();
|
||||
FaceRecognizer::Detection match = recognizer->isMatch(person);
|
||||
reconizerMutex.unlock();
|
||||
if(match.person >= 0)
|
||||
{
|
||||
detection.priority += 10;
|
||||
hasmatch = true;
|
||||
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
|
||||
}
|
||||
}
|
||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||
}
|
||||
|
||||
cv::Rect crop;
|
||||
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.seamCarving && incompleate)
|
||||
{
|
||||
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
||||
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
||||
{
|
||||
detections = yolo.runInference(image);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat croppedImage;
|
||||
|
||||
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
|
||||
{
|
||||
intRoi.getCropRectangle(crop, detections, image.size());
|
||||
|
||||
if(config.debug)
|
||||
{
|
||||
cv::Mat debugImage = image.clone();
|
||||
drawDebugInfo(debugImage, crop, detections);
|
||||
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else if(!incompleate)
|
||||
{
|
||||
croppedImage = image(crop);
|
||||
}
|
||||
else
|
||||
{
|
||||
croppedImage = image;
|
||||
}
|
||||
|
||||
cv::Mat resizedImage;
|
||||
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
|
||||
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||
}
|
||||
|
||||
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
|
||||
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||
{
|
||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
||||
for(std::filesystem::path path : images)
|
||||
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
|
||||
{
|
||||
std::vector<std::vector<T>> out;
|
||||
|
||||
size_t length = vec.size()/parts;
|
||||
size_t remain = vec.size() % parts;
|
||||
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
|
||||
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
|
||||
{
|
||||
end += (remain > 0) ? (length + !!(remain--)) : length;
|
||||
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
|
||||
begin = end;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
Log::level = Log::INFO;
|
||||
|
||||
Config config;
|
||||
argp_parse(&argp, argc, argv, 0, 0, &config);
|
||||
|
||||
if(config.outputDir.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(config.imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"at least one input image or directory is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
|
||||
for(const std::filesystem::path& path : config.imagePaths)
|
||||
getImageFiles(path, imagePaths);
|
||||
|
||||
Log(Log::DEBUG)<<"Images:";
|
||||
for(const::std::filesystem::path& path: imagePaths)
|
||||
Log(Log::DEBUG)<<path;
|
||||
|
||||
if(imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"no image was found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!std::filesystem::exists(config.outputDir))
|
||||
{
|
||||
if(!std::filesystem::create_directory(config.outputDir))
|
||||
{
|
||||
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
||||
if(config.debug)
|
||||
{
|
||||
if(!std::filesystem::exists(debugOutputPath))
|
||||
std::filesystem::create_directory(debugOutputPath);
|
||||
}
|
||||
|
||||
FaceRecognizer* recognizer = nullptr;
|
||||
std::mutex recognizerMutex;
|
||||
if(!config.focusPersonImage.empty())
|
||||
{
|
||||
cv::Mat personImage = cv::imread(config.focusPersonImage);
|
||||
if(personImage.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
|
||||
return 1;
|
||||
}
|
||||
recognizer = new FaceRecognizer();
|
||||
recognizer->addReferances({personImage});
|
||||
recognizer->setThreshold(config.threshold);
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
|
||||
|
||||
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
|
||||
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
|
||||
|
||||
for(std::thread& thread : threads)
|
||||
thread.join();
|
||||
|
||||
return 0;
|
||||
}
|
98
SmartCrop/options.h
Normal file
98
SmartCrop/options.h
Normal file
|
@ -0,0 +1,98 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <argp.h>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include "log.h"
|
||||
|
||||
const char *argp_program_version = "AIImagePreprocesses";
|
||||
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
||||
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
||||
static char args_doc[] = "FILE(S)";
|
||||
|
||||
static struct argp_option options[] =
|
||||
{
|
||||
{"verbose", 'v', 0, 0, "Show debug messages" },
|
||||
{"quiet", 'q', 0, 0, "only output data" },
|
||||
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||
{"debug", 'd', 0, 0, "output debug images" },
|
||||
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
|
||||
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
|
||||
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
|
||||
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
|
||||
{0}
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
std::filesystem::path modelPath;
|
||||
std::filesystem::path classesPath;
|
||||
std::filesystem::path outputDir;
|
||||
std::filesystem::path focusPersonImage;
|
||||
bool seamCarving = false;
|
||||
bool debug = false;
|
||||
double threshold = 0.363;
|
||||
cv::Size targetSize = cv::Size(512, 512);
|
||||
};
|
||||
|
||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
Config *config = reinterpret_cast<Config*>(state->input);
|
||||
try
|
||||
{
|
||||
switch (key)
|
||||
{
|
||||
case 'q':
|
||||
Log::level = Log::ERROR;
|
||||
break;
|
||||
case 'v':
|
||||
Log::level = Log::DEBUG;
|
||||
break;
|
||||
case 'm':
|
||||
config->modelPath = arg;
|
||||
break;
|
||||
case 'c':
|
||||
config->classesPath = arg;
|
||||
break;
|
||||
case 'd':
|
||||
config->debug = true;
|
||||
break;
|
||||
case 'o':
|
||||
config->outputDir.assign(arg);
|
||||
break;
|
||||
case 's':
|
||||
config->seamCarving = true;
|
||||
break;
|
||||
case 'f':
|
||||
config->focusPersonImage = arg;
|
||||
break;
|
||||
case 't':
|
||||
config->threshold = std::atof(arg);
|
||||
break;
|
||||
case 'z':
|
||||
{
|
||||
int x = std::stoi(arg);
|
||||
config->targetSize = cv::Size(x, x);
|
||||
break;
|
||||
}
|
||||
case ARGP_KEY_ARG:
|
||||
config->imagePaths.push_back(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
}
|
||||
catch(const std::invalid_argument& ex)
|
||||
{
|
||||
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
|
||||
return ARGP_KEY_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp argp = {options, parse_opt, args_doc, doc};
|
|
@ -1,19 +1,19 @@
|
|||
#include "seamcarving.h"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#if __cplusplus >= 201703L
|
||||
#include <filesystem>
|
||||
#endif
|
||||
#include <cfloat>
|
||||
#include <vector>
|
||||
#include "log.h"
|
||||
|
||||
SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) :
|
||||
image(img), seams(seams), grow(grow) {}
|
||||
|
||||
void SeamCarving::init()
|
||||
bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
cv::Mat newFrame = image.clone();
|
||||
assert(!newFrame.empty());
|
||||
std::vector<std::vector<int>> vecSeams;
|
||||
|
||||
for(int i = 0; i < seams; i++)
|
||||
{
|
||||
|
@ -24,230 +24,55 @@ void SeamCarving::init()
|
|||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
||||
|
||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
||||
vecSeams.push_back(seam);
|
||||
if(seamsVect)
|
||||
seamsVect->push_back(seam);
|
||||
|
||||
newFrame = removeLeastImportantPath(newFrame,seam);
|
||||
newFrame = removeLeastImportantPath(newFrame, seam);
|
||||
|
||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
break;
|
||||
}
|
||||
if(newFrame.rows == 0 || newFrame.cols == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (grow)
|
||||
{
|
||||
cv::Mat growMat = image.clone();
|
||||
|
||||
for (int i = 0; i < vecSeams.size(); i++)
|
||||
for(size_t i = 0; i < vecSeams.size(); i++)
|
||||
{
|
||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
||||
}
|
||||
finalImage = growMat;
|
||||
image = growMat;
|
||||
}
|
||||
else
|
||||
{
|
||||
finalImage = newFrame;
|
||||
image = newFrame;
|
||||
}
|
||||
|
||||
sliderPos = seams;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SeamCarving::computeNewFinalImage(int sliderPos)
|
||||
bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||
{
|
||||
if(sliderPos == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
return;
|
||||
}
|
||||
if(sliderPos < 1 || sliderPos >= sliderMax-1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if(sliderPos > vecSeams.size())
|
||||
{
|
||||
cv::Mat newFrame = finalImage.clone();
|
||||
for(int i = vecSeams.size()-1; i < sliderPos; i++)
|
||||
{
|
||||
//Gradient Magnitude for intensity of image.
|
||||
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
|
||||
//Use DP to create the real energy map that is used for path calculation.
|
||||
// Strictly using vertical paths for testing simplicity.
|
||||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
||||
|
||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
break;
|
||||
}
|
||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
||||
vecSeams.push_back(seam);
|
||||
newFrame = removeLeastImportantPath(newFrame,seam);
|
||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (grow)
|
||||
{
|
||||
cv::Mat growMat = image.clone();
|
||||
|
||||
for (int i = 0; i < vecSeams.size(); i++)
|
||||
{
|
||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
||||
}
|
||||
|
||||
finalImage = growMat;
|
||||
}
|
||||
else
|
||||
{
|
||||
finalImage = newFrame;
|
||||
}
|
||||
}
|
||||
else if (sliderPos < vecSeams.size())
|
||||
{
|
||||
cv::Mat newFrame = image.clone();
|
||||
for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not)
|
||||
{
|
||||
|
||||
if (grow)
|
||||
{
|
||||
newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
|
||||
}
|
||||
|
||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
||||
{
|
||||
finalImage = image;
|
||||
break;
|
||||
}
|
||||
}
|
||||
finalImage = newFrame;
|
||||
}
|
||||
cv::transpose(image, image);
|
||||
bool ret = strechImage(image, seams, grow, seamsVect);
|
||||
cv::transpose(image, image);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const cv::Mat& SeamCarving::getFinalImage()
|
||||
bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
|
||||
{
|
||||
return finalImage;
|
||||
}
|
||||
std::vector<std::vector<int>> seamsVect;
|
||||
seamsImage = image.clone();
|
||||
|
||||
void SeamCarving::showSeamsImg()
|
||||
{
|
||||
cv::Mat seamsFrame = image.clone();
|
||||
//std::cout << "sliderPos: " << sliderPos << std::endl;
|
||||
for(int i = 0; i < sliderPos; i++)
|
||||
{
|
||||
seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
|
||||
}
|
||||
cv::imwrite("output/seams_image.jpg", seamsFrame);
|
||||
cv::imshow( "Image Seams", seamsFrame);
|
||||
}
|
||||
bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
|
||||
if(!ret)
|
||||
return false;
|
||||
|
||||
static void onChange( int pos, void* object )
|
||||
{
|
||||
SeamCarving* sc = (SeamCarving*)(object);
|
||||
/*if(sc->getBlockUpdateStatus()) {
|
||||
return;
|
||||
}*/
|
||||
sc->computeNewFinalImage(pos);
|
||||
imshow("Final Image", sc->getFinalImage());
|
||||
#if DEBUG
|
||||
sc->showSeamsImg();
|
||||
#endif
|
||||
}
|
||||
static void onMouse( int event, int x, int y, int, void* object)
|
||||
{
|
||||
SeamCarving* sc = (SeamCarving*)(object);
|
||||
if( event == cv::EVENT_LBUTTONDOWN ||
|
||||
event == cv::EVENT_RBUTTONDOWN ||
|
||||
event == cv::EVENT_MBUTTONDOWN
|
||||
)
|
||||
{
|
||||
sc->setBlockUpdate(true);
|
||||
}
|
||||
else if(event == cv::EVENT_LBUTTONUP ||
|
||||
event == cv::EVENT_RBUTTONUP ||
|
||||
event == cv::EVENT_MBUTTONUP)
|
||||
{
|
||||
sc->setBlockUpdate(false);
|
||||
}
|
||||
}
|
||||
|
||||
void SeamCarving::setBlockUpdate(bool bUpdate)
|
||||
{
|
||||
blockUpdate = bUpdate;
|
||||
}
|
||||
|
||||
bool SeamCarving::getBlockUpdateStatus()
|
||||
{
|
||||
return blockUpdate;
|
||||
}
|
||||
|
||||
void SeamCarving::showImage()
|
||||
{
|
||||
#if __cplusplus >= 201703L
|
||||
if(!std::filesystem::exists("output"))
|
||||
{
|
||||
std::filesystem::create_directory("output");
|
||||
}
|
||||
#endif
|
||||
if( image.empty() )
|
||||
{
|
||||
std::cout << "Could not open raw image" << std::endl ;
|
||||
return;
|
||||
}
|
||||
namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
|
||||
cv::imshow( "Raw Image", image );
|
||||
|
||||
if( finalImage.empty() )
|
||||
{
|
||||
std::cout << "Could not open final image" << std::endl ;
|
||||
return;
|
||||
}
|
||||
#if DEBUG
|
||||
namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
|
||||
cv::Mat gradient = computeGradientMagnitude(image);
|
||||
cv::Mat u8_image;
|
||||
gradient.convertTo(u8_image, CV_8U);
|
||||
|
||||
cv::imwrite("output/gradient_image.jpg", u8_image);
|
||||
cv::imshow("gradient Image", u8_image);
|
||||
|
||||
namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
|
||||
cv::Mat u8_image2;
|
||||
cv::Mat intensityMat = computePathIntensityMat(gradient);
|
||||
cv::Mat dst;
|
||||
cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
|
||||
dst.convertTo(u8_image2, CV_8U);
|
||||
cv::imwrite("output/intensity_image.jpg", u8_image2);
|
||||
cv::imshow( "intensity Image", u8_image2);
|
||||
|
||||
//cv::Mat engImg = GetEnergyImg(image);
|
||||
//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
|
||||
//cv::Mat u8_image3;
|
||||
//engImg.convertTo(u8_image3, CV_8U);
|
||||
//cv::imshow( "energy Image", u8_image3);
|
||||
namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
|
||||
showSeamsImg();
|
||||
|
||||
#endif
|
||||
|
||||
namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
|
||||
cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
|
||||
//cv::setMouseCallback("Final Image", onMouse, this );
|
||||
cv::imwrite("output/final_image.jpg", finalImage);
|
||||
cv::imshow("Final Image", finalImage);
|
||||
cv::waitKey(0);
|
||||
for(size_t i = 0; i < seamsVect.size(); ++i)
|
||||
seamsImage = drawSeam(seamsImage, seamsVect[i]);
|
||||
return true;
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
|
||||
|
@ -392,9 +217,7 @@ cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std
|
|||
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = newMat.data;
|
||||
for(int row = 0; row < seam.size(); row++)
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
removePixel(original, newMat, row, seam[row]);
|
||||
}
|
||||
|
@ -460,9 +283,7 @@ cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::v
|
|||
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
|
||||
cv::Mat newMat = cv::Mat(size, original.type());
|
||||
|
||||
unsigned char *rawOrig = original.data;
|
||||
unsigned char *rawOutput = newMat.data;
|
||||
for(int row = 0; row < seam.size(); row++)
|
||||
for(size_t row = 0; row < seam.size(); row++)
|
||||
{
|
||||
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
|
||||
addPixel(original, newMat, row, seam[row]);
|
||||
|
@ -518,3 +339,18 @@ void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row,
|
|||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Mat retMat = frame.clone();
|
||||
for(int row = 0; row < frame.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < frame.cols; col++)
|
||||
{
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
||||
}
|
||||
}
|
||||
return retMat;
|
||||
}
|
24
SmartCrop/seamcarving.h
Normal file
24
SmartCrop/seamcarving.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <vector>
|
||||
|
||||
class SeamCarving
|
||||
{
|
||||
private:
|
||||
static cv::Mat GetEnergyImg(const cv::Mat &img);
|
||||
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
||||
static float intensity(float currIndex, int start, int end);
|
||||
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
||||
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
||||
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
||||
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
||||
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
|
||||
|
||||
public:
|
||||
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
|
||||
};
|
|
@ -11,8 +11,8 @@
|
|||
#define INCBIN_PREFIX r
|
||||
#include "incbin.h"
|
||||
|
||||
INCTXT(defaultClasses, "../classes.txt");
|
||||
INCBIN(defaultModel, "../yolov8x.onnx");
|
||||
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
|
||||
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
|
||||
|
||||
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
||||
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
||||
|
@ -22,6 +22,7 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
|
|||
|
||||
if(classesTxtFilePath.empty())
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin classes";
|
||||
loadClasses(rdefaultClassesData);
|
||||
}
|
||||
else
|
||||
|
@ -31,19 +32,21 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
|
|||
}
|
||||
|
||||
if(!modelPath.empty())
|
||||
{
|
||||
net = cv::dnn::readNetFromONNX(modelPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log(Log::INFO)<<"Using builtin yolo model";
|
||||
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
|
||||
|
||||
}
|
||||
if(runWithOCl)
|
||||
{
|
||||
std::cout << "\nRunning on OCV" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "\nRunning on CPU" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||
}
|
||||
|
@ -176,14 +179,33 @@ std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
|
|||
|
||||
result.className = classes[result.class_id].first;
|
||||
result.priority = classes[result.class_id].second;
|
||||
clampBox(boxes[idx], input.size());
|
||||
result.box = boxes[idx];
|
||||
|
||||
detections.push_back(result);
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
|
||||
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
|
||||
{
|
||||
if(box.x < 0)
|
||||
{
|
||||
box.width += box.x;
|
||||
box.x = 0;
|
||||
}
|
||||
if(box.y < 0)
|
||||
{
|
||||
box.height += box.y;
|
||||
box.y = 0;
|
||||
}
|
||||
if(box.x+box.width > size.width)
|
||||
box.width = size.width - box.x;
|
||||
if(box.y+box.height > size.height)
|
||||
box.height = size.height - box.y;
|
||||
}
|
||||
|
||||
void Yolo::loadClasses(const std::string& classesStr)
|
||||
{
|
||||
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
|
|
@ -27,19 +27,16 @@ private:
|
|||
static constexpr float modelScoreThreshold = 0.45;
|
||||
static constexpr float modelNMSThreshold = 0.50;
|
||||
|
||||
std::string modelPath;
|
||||
std::vector<std::pair<std::string, int>> classes;
|
||||
cv::Size2f modelShape;
|
||||
bool letterBoxForSquare = true;
|
||||
cv::dnn::Net net;
|
||||
|
||||
void loadClasses(const std::string& classes);
|
||||
void loadOnnxNetwork(const std::filesystem::path& path);
|
||||
cv::Mat formatToSquare(const cv::Mat &source);
|
||||
|
||||
std::string modelPath;
|
||||
|
||||
std::vector<std::pair<std::string, int>> classes;
|
||||
|
||||
cv::Size2f modelShape;
|
||||
|
||||
bool letterBoxForSquare = true;
|
||||
|
||||
cv::dnn::Net net;
|
||||
static void clampBox(cv::Rect& box, const cv::Size& size);
|
||||
|
||||
public:
|
||||
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
|
80
Weights/classes.txt
Normal file
80
Weights/classes.txt
Normal file
|
@ -0,0 +1,80 @@
|
|||
person, 10
|
||||
bicycle, 4
|
||||
car, 3
|
||||
motorcycle, 4
|
||||
airplane, 4
|
||||
bus, 4
|
||||
train, 4
|
||||
truck, 3
|
||||
boat, 4
|
||||
traffic light, 1
|
||||
fire hydrant, 1
|
||||
stop sign, 1
|
||||
parking meter, 1
|
||||
bench, 2
|
||||
bird, 5
|
||||
cat, 6
|
||||
dog, 5
|
||||
horse, 4
|
||||
sheep, 5
|
||||
cow, 4
|
||||
elephant, 5
|
||||
bear, 5
|
||||
zebra, 5
|
||||
giraffe, 5
|
||||
backpack, 3
|
||||
umbrella, 3
|
||||
handbag, 3
|
||||
tie, 3
|
||||
suitcase, 2
|
||||
frisbee, 3
|
||||
skis, 3
|
||||
snowboard, 3
|
||||
sports ball, 3
|
||||
kite, 4
|
||||
baseball bat, 3
|
||||
baseball glove, 3
|
||||
skateboard, 3
|
||||
surfboard, 3
|
||||
tennis racket, 3
|
||||
bottle, 2
|
||||
wine glass, 2
|
||||
cup, 2
|
||||
fork, 1
|
||||
knife, 1
|
||||
spoon, 1
|
||||
bowl, 1
|
||||
banana, 1
|
||||
apple, 1
|
||||
sandwich,1
|
||||
orange, 1
|
||||
broccoli, 1
|
||||
carrot, 1
|
||||
hot dog, 1
|
||||
pizza, 1
|
||||
donut, 2
|
||||
cake, 2
|
||||
chair, 1
|
||||
couch, 1
|
||||
potted plant, 1
|
||||
bed, 1
|
||||
dining table, 1
|
||||
toilet, 1
|
||||
tv, 1
|
||||
laptop, 1
|
||||
mouse, 1
|
||||
remote, 1
|
||||
keyboard, 1
|
||||
cell phone, 1
|
||||
microwave, 1
|
||||
oven, 1
|
||||
toaster, 1
|
||||
sink, 1
|
||||
refrigerator, 1
|
||||
book, 1
|
||||
clock, 1
|
||||
vase, 1
|
||||
scissors, 1
|
||||
teddy bear, 1
|
||||
hair drier, 1
|
||||
toothbrush, 1
|
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
Binary file not shown.
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
Binary file not shown.
BIN
Weights/yolov8x.onnx
Normal file
BIN
Weights/yolov8x.onnx
Normal file
Binary file not shown.
295
main.cpp
295
main.cpp
|
@ -1,295 +0,0 @@
|
|||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <opencv2/core/types.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "yolo.h"
|
||||
#include "log.h"
|
||||
#include "options.h"
|
||||
#include "utils.h"
|
||||
#include "intelligentroi.h"
|
||||
|
||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||
{
|
||||
const Yolo::Detection* inDetection = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(!ignore || ignore != &detection)
|
||||
continue;
|
||||
|
||||
if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
|
||||
{
|
||||
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
||||
inDetection = &detection;
|
||||
}
|
||||
}
|
||||
return inDetection;
|
||||
}
|
||||
|
||||
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
||||
{
|
||||
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
||||
|
||||
if(!inDetection)
|
||||
{
|
||||
const Yolo::Detection* closest = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(detection.box.x > x)
|
||||
{
|
||||
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
||||
closest = &detection;
|
||||
}
|
||||
}
|
||||
if(closest)
|
||||
x = closest->box.x;
|
||||
else
|
||||
x = imgSizeX;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = inDetection->box.br().x;
|
||||
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
||||
if(candidateDetection && candidateDetection->box.br().x > x)
|
||||
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> out;
|
||||
|
||||
for(int x = 0; x < image.cols; ++x)
|
||||
{
|
||||
int start = x;
|
||||
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
||||
|
||||
cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
|
||||
out.push_back({slice, frozen});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||
{
|
||||
const Yolo::Detection* inDetection = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(!ignore || ignore != &detection)
|
||||
continue;
|
||||
|
||||
if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
|
||||
{
|
||||
if(!inDetection || detection.box.br().y > inDetection->box.br().y)
|
||||
inDetection = &detection;
|
||||
}
|
||||
}
|
||||
return inDetection;
|
||||
}
|
||||
|
||||
bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
|
||||
{
|
||||
const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
|
||||
|
||||
if(!inDetection)
|
||||
{
|
||||
const Yolo::Detection* closest = nullptr;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
if(detection.box.y > y)
|
||||
{
|
||||
if(closest == nullptr || detection.box.y-y > closest->box.y-y)
|
||||
closest = &detection;
|
||||
}
|
||||
}
|
||||
if(closest)
|
||||
y = closest->box.y;
|
||||
else
|
||||
y = imgSizeY;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
y = inDetection->box.br().y;
|
||||
const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
|
||||
if(candidateDetection && candidateDetection->box.br().y > y)
|
||||
return findRegionEndpointVert(y, detections, imgSizeY);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> out;
|
||||
|
||||
for(int y = 0; y < image.rows; ++y)
|
||||
{
|
||||
int start = y;
|
||||
bool frozen = findRegionEndpointVert(y, detections, image.rows);
|
||||
|
||||
cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
|
||||
out.push_back({slice, frozen});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
|
||||
{
|
||||
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
||||
|
||||
bool vertical = false;
|
||||
cv::Mat workImage;
|
||||
if(aspectRatio > targetAspectRatio)
|
||||
vertical = true;
|
||||
|
||||
int requiredLines = 0;
|
||||
if(!vertical)
|
||||
requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
|
||||
else
|
||||
requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
|
||||
|
||||
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
||||
|
||||
if(!vertical)
|
||||
{
|
||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
||||
int totalResizableSize = 0;
|
||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||
{
|
||||
if(slice.second)
|
||||
totalResizableSize += slice.first.cols;
|
||||
}
|
||||
|
||||
std::vector<int> seamsForSlice(slices.size());
|
||||
for(size_t i = 0; i < slices.size(); ++i)
|
||||
{
|
||||
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int totalResizableSize = 0;
|
||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
||||
{
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
{
|
||||
cv::rectangle(image, detection.box, detection.color, 4);
|
||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
|
||||
cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
|
||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
|
||||
}
|
||||
|
||||
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
Log::level = Log::INFO;
|
||||
|
||||
Config config;
|
||||
argp_parse(&argp, argc, argv, 0, 0, &config);
|
||||
|
||||
if(config.outputDir.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(config.imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"at least one input image or directory is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
|
||||
for(const std::filesystem::path& path : config.imagePaths)
|
||||
getImageFiles(path, imagePaths);
|
||||
|
||||
if(imagePaths.empty())
|
||||
{
|
||||
Log(Log::ERROR)<<"no image was found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
||||
InteligentRoi intRoi(yolo);
|
||||
|
||||
if(!std::filesystem::exists(config.outputDir))
|
||||
{
|
||||
if(!std::filesystem::create_directory(config.outputDir))
|
||||
{
|
||||
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
||||
if(config.debug)
|
||||
{
|
||||
if(!std::filesystem::exists(debugOutputPath))
|
||||
std::filesystem::create_directory(debugOutputPath);
|
||||
}
|
||||
|
||||
for(const std::filesystem::path& path : imagePaths)
|
||||
{
|
||||
cv::Mat image = cv::imread(path);
|
||||
if(!image.data)
|
||||
{
|
||||
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
||||
continue;
|
||||
}
|
||||
|
||||
if(std::max(image.cols, image.rows) > 1024)
|
||||
{
|
||||
if(image.cols > image.rows)
|
||||
{
|
||||
double ratio = 1024.0/image.cols;
|
||||
cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
double ratio = 1024.0/image.rows;
|
||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||
|
||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||
for(const Yolo::Detection& detection : detections)
|
||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
|
||||
|
||||
|
||||
cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
|
||||
|
||||
cv::Mat debugImage = image.clone();
|
||||
drawDebugInfo(debugImage, crop, detections);
|
||||
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
||||
|
||||
cv::Mat croppedImage = image(crop);
|
||||
cv::Mat resizedImage;
|
||||
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
|
||||
ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||
if(!ret)
|
||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||
}
|
||||
return 0;
|
||||
}
|
70
options.h
70
options.h
|
@ -1,70 +0,0 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <argp.h>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include "log.h"
|
||||
|
||||
const char *argp_program_version = "AIImagePreprocesses";
|
||||
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
||||
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
||||
static char args_doc[] = "[IMAGES]";
|
||||
|
||||
static struct argp_option options[] =
|
||||
{
|
||||
{"verbose", 'v', 0, 0, "Show debug messages" },
|
||||
{"quiet", 'q', 0, 0, "only output data" },
|
||||
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||
{"debug", 'd', 0, 0, "output debug images" },
|
||||
{"seam-carving", 's', 0, 0, "model to train: "}
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::vector<std::filesystem::path> imagePaths;
|
||||
std::filesystem::path modelPath;
|
||||
std::filesystem::path classesPath;
|
||||
std::filesystem::path outputDir;
|
||||
bool seamCarving = false;
|
||||
bool debug = false;
|
||||
};
|
||||
|
||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
Config *config = reinterpret_cast<Config*>(state->input);
|
||||
switch (key)
|
||||
{
|
||||
case 'q':
|
||||
Log::level = Log::ERROR;
|
||||
break;
|
||||
case 'v':
|
||||
Log::level = Log::DEBUG;
|
||||
break;
|
||||
case 'm':
|
||||
config->modelPath = arg;
|
||||
break;
|
||||
case 'c':
|
||||
config->classesPath = arg;
|
||||
break;
|
||||
case 'd':
|
||||
config->debug = true;
|
||||
break;
|
||||
case 'o':
|
||||
config->outputDir.assign(arg);
|
||||
break;
|
||||
case 's':
|
||||
config->seamCarving = true;
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
config->imagePaths.push_back(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp argp = {options, parse_opt, args_doc, doc};
|
|
@ -1,61 +0,0 @@
|
|||
#ifndef __SEAM__CARVING_HPP__
|
||||
#define __SEAM__CARVING_HPP__
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#define DEBUG 0
|
||||
|
||||
class SeamCarving {
|
||||
public:
|
||||
void showImage();
|
||||
const cv::Mat& getFinalImage();
|
||||
virtual void computeNewFinalImage(int pos);
|
||||
void setBlockUpdate(bool bUpdate);
|
||||
bool getBlockUpdateStatus();
|
||||
virtual void showSeamsImg();
|
||||
|
||||
protected:
|
||||
SeamCarving(const cv::Mat &img, int seams, bool grow);
|
||||
void init();
|
||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
|
||||
cv::Mat image;
|
||||
cv::Mat finalImage;
|
||||
int seams;
|
||||
bool grow;
|
||||
int sliderMax;
|
||||
int sliderPos;
|
||||
std::vector<std::vector<int>> vecSeams;
|
||||
|
||||
private:
|
||||
cv::Mat GetEnergyImg(const cv::Mat &img);
|
||||
cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
||||
float intensity(float currIndex, int start, int end);
|
||||
cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
||||
std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
||||
cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
||||
cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||
void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
||||
bool blockUpdate = false;
|
||||
|
||||
};
|
||||
|
||||
class SeamCarvingHorizontal : public SeamCarving
|
||||
{
|
||||
public:
|
||||
SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
|
||||
protected:
|
||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
|
||||
};
|
||||
|
||||
class SeamCarvingVertical : public SeamCarving {
|
||||
public:
|
||||
SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
|
||||
virtual void computeNewFinalImage(int pos) override;
|
||||
#if DEBUG
|
||||
virtual void showSeamsImg() override;
|
||||
#endif
|
||||
protected:
|
||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
|
||||
};
|
||||
|
||||
#endif // __SEAM__CARVING_HPP__
|
|
@ -1,28 +0,0 @@
|
|||
#include "seamcarving.h"
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <cfloat>
|
||||
|
||||
cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Mat retMat = frame.clone();
|
||||
for(int row = 0; row < frame.rows; row++)
|
||||
{
|
||||
for(int col = 0; col < frame.cols; col++)
|
||||
{
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
||||
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
||||
}
|
||||
}
|
||||
return retMat;
|
||||
}
|
||||
|
||||
SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
|
||||
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
|
||||
{
|
||||
sliderMax = image.cols;
|
||||
init();
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
#include "seamcarving.h"
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <cfloat>
|
||||
|
||||
SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
|
||||
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
|
||||
{
|
||||
sliderMax = image.rows;
|
||||
cv::Mat oldImage = image;
|
||||
image = image.t();
|
||||
init();
|
||||
image = oldImage;
|
||||
finalImage = finalImage.t();
|
||||
}
|
||||
|
||||
cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||
{
|
||||
cv::Mat retMat = frame.clone();
|
||||
for(int col = 0; col < frame.cols; col++)
|
||||
{
|
||||
for(int row = 0; row < frame.rows; row++)
|
||||
{
|
||||
retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
|
||||
retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
|
||||
retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
|
||||
}
|
||||
}
|
||||
return retMat;
|
||||
}
|
||||
|
||||
void SeamCarvingVertical::computeNewFinalImage(int pos)
|
||||
{
|
||||
cv::Mat oldImage = image;
|
||||
image = image.t();
|
||||
SeamCarving::computeNewFinalImage(pos);
|
||||
image = oldImage;
|
||||
finalImage = finalImage.t();
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
void SeamCarvingVertical::showSeamsImg()
|
||||
{
|
||||
cv::Mat oldImage = this->image;
|
||||
this->image = this->image.t();
|
||||
SeamCarving::showImage();
|
||||
this->image = oldImage;
|
||||
}
|
||||
#endif
|
Loading…
Reference in a new issue