Compare commits
10 commits
438c9d726c
...
ce3279254f
Author | SHA1 | Date | |
---|---|---|---|
|
ce3279254f | ||
|
55953bcdb7 | ||
|
f97f4640a9 | ||
|
03e2b3119a | ||
|
81475815fb | ||
|
35cfa8a906 | ||
|
a279001151 | ||
|
b2ffbfa530 | ||
|
b3c2d585ae | ||
![]() |
f5dad284e6 |
|
@ -1,15 +1,7 @@
|
||||||
cmake_minimum_required(VERSION 3.6)
|
cmake_minimum_required(VERSION 3.6)
|
||||||
project(AIImagePrepross)
|
project(ImageAiUtils)
|
||||||
|
|
||||||
find_package(OpenCV REQUIRED)
|
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(WEIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Weights)
|
||||||
|
|
||||||
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarvingvert.cpp seamcarvinghoriz.cpp seamcarving.cpp utils.cpp intelligentroi.cpp)
|
add_subdirectory(SmartCrop)
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME} ${SRC_FILES})
|
|
||||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} -ltbb)
|
|
||||||
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
|
||||||
target_compile_options(${PROJECT_NAME} PRIVATE -s -g -Wall)
|
|
||||||
|
|
||||||
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
|
|
||||||
|
|
141
DanbooruTagger/DanbooruTagger.py
Normal file
141
DanbooruTagger/DanbooruTagger.py
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
import warnings
|
||||||
|
from deepdanbooru_onnx import DeepDanbooru
|
||||||
|
import argparse
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
import numpy
|
||||||
|
from typing import Iterator
|
||||||
|
from torch.multiprocessing import Process, Queue
|
||||||
|
import json
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||||
|
|
||||||
|
|
||||||
|
def find_image_files(path: str) -> list[str]:
|
||||||
|
paths = list()
|
||||||
|
for root, dirs, files in os.walk(path):
|
||||||
|
for filename in files:
|
||||||
|
name, extension = os.path.splitext(filename)
|
||||||
|
if extension.lower() in image_ext_ocv:
|
||||||
|
paths.append(os.path.join(root, filename))
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||||
|
for path in paths:
|
||||||
|
name, extension = os.path.splitext(path)
|
||||||
|
extension = extension.lower()
|
||||||
|
imagebgr = cv2.imread(path)
|
||||||
|
image = cv2.cvtColor(imagebgr, cv2.COLOR_BGR2RGB)
|
||||||
|
if image is None:
|
||||||
|
print(f"Warning: could not load {path}")
|
||||||
|
else:
|
||||||
|
yield image, path
|
||||||
|
|
||||||
|
|
||||||
|
def pipeline(queue: Queue, image_paths: list[str], prompt: str, device: torch.device, model_name_or_path: str, batch_size: int):
|
||||||
|
model = LlavaForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16, low_cpu_mem_usage=None,
|
||||||
|
quantization_config=BitsAndBytesConfig(
|
||||||
|
load_in_4bit=True,
|
||||||
|
bnb_4bit_compute_dtype=torch.float16,
|
||||||
|
bnb_4bit_use_double_quant=False,
|
||||||
|
bnb_4bit_quant_type='nf4',
|
||||||
|
), device_map=device, attn_implementation="flash_attention_2")
|
||||||
|
processor = AutoProcessor.from_pretrained(model_name_or_path)
|
||||||
|
image_generator = image_loader(image_paths)
|
||||||
|
|
||||||
|
stop = False
|
||||||
|
finished_count = 0
|
||||||
|
while not stop:
|
||||||
|
prompts = list()
|
||||||
|
images = list()
|
||||||
|
filenames = list()
|
||||||
|
for i in range(0, batch_size):
|
||||||
|
image, filename = next(image_generator, (None, None))
|
||||||
|
if image is None:
|
||||||
|
stop = True
|
||||||
|
break
|
||||||
|
|
||||||
|
filenames.append(filename)
|
||||||
|
images.append(image)
|
||||||
|
prompts.append(prompt)
|
||||||
|
|
||||||
|
if len(images) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
inputs = processor(text=prompts, images=images, return_tensors="pt").to(model.device)
|
||||||
|
generate_ids = model.generate(**inputs, max_new_tokens=100, min_new_tokens=3, length_penalty=1.0, do_sample=False, temperature=1.0, top_k=50, top_p=1.0)
|
||||||
|
decodes = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
||||||
|
finished_count += len(images)
|
||||||
|
for i, decoded in enumerate(decodes):
|
||||||
|
trim = len(prompt) - len("<image>")
|
||||||
|
queue.put({"file_name": filenames[i], "text": decoded[trim:].strip()})
|
||||||
|
|
||||||
|
|
||||||
|
def split_list(input_list, count):
|
||||||
|
target_length = int(len(input_list) / count)
|
||||||
|
for i in range(0, count - 1):
|
||||||
|
yield input_list[i * target_length: (i + 1) * target_length]
|
||||||
|
yield input_list[(count - 1) * target_length: len(input_list)]
|
||||||
|
|
||||||
|
|
||||||
|
def save_meta(meta_file, meta, reldir, common_description):
|
||||||
|
meta["file_name"] = os.path.relpath(meta["file_name"], reldir)
|
||||||
|
if common_description is not None:
|
||||||
|
meta["text"] = common_description + meta["text"]
|
||||||
|
meta_file.write(json.dumps(meta) + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser("A script to tag images via llava")
|
||||||
|
parser.add_argument('--model', '-m', default="llava-hf/llava-1.5-13b-hf", help="model to use")
|
||||||
|
parser.add_argument('--quantize', '-q', action='store_true', help="load quantized")
|
||||||
|
parser.add_argument('--prompt', '-p', default="Please describe this image in 10 to 20 words.", help="Prompt to use on eatch image")
|
||||||
|
parser.add_argument('--batch', '-b', default=4, type=int, help="Batch size to use for inference")
|
||||||
|
parser.add_argument('--common_description', '-c', help="An optional description that will be preended to the ai generated one")
|
||||||
|
parser.add_argument('--image_dir', '-i', help="A directory containg the images to tag")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
prompt = "USER: <image>\n" + args.prompt + "\nASSISTANT: "
|
||||||
|
os.environ["BITSANDBYTES_NOWELCOME"] = "1"
|
||||||
|
|
||||||
|
image_paths = find_image_files(args.image_dir)
|
||||||
|
image_path_chunks = list(split_list(image_paths, torch.cuda.device_count()))
|
||||||
|
|
||||||
|
print(f"Will use {torch.cuda.device_count()} processies to create tags")
|
||||||
|
|
||||||
|
logging.set_verbosity_error()
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
torch.multiprocessing.set_start_method('spawn')
|
||||||
|
|
||||||
|
queue = Queue()
|
||||||
|
processies = list()
|
||||||
|
for i in range(0, torch.cuda.device_count()):
|
||||||
|
processies.append(Process(target=pipeline, args=(queue, image_path_chunks[i], prompt, torch.device(i), args.model, args.batch)))
|
||||||
|
processies[-1].start()
|
||||||
|
|
||||||
|
progress = tqdm(desc="Generateing tags", total=len(image_paths))
|
||||||
|
exit = False
|
||||||
|
with open(os.path.join(args.image_dir, "metadata.jsonl"), mode='w') as output_file:
|
||||||
|
while not exit:
|
||||||
|
if not queue.empty():
|
||||||
|
meta = queue.get()
|
||||||
|
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||||
|
progress.update()
|
||||||
|
exit = True
|
||||||
|
for process in processies:
|
||||||
|
if process.is_alive():
|
||||||
|
exit = False
|
||||||
|
break
|
||||||
|
|
||||||
|
while not queue.empty():
|
||||||
|
meta = queue.get()
|
||||||
|
save_meta(output_file, meta, args.image_dir, args.common_description)
|
||||||
|
progress.update()
|
||||||
|
|
||||||
|
for process in processies:
|
||||||
|
process.join()
|
||||||
|
|
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
3
DanbooruTagger/deepdanbooru_onnx/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from .deepdanbooru_onnx import DeepDanbooru
|
||||||
|
from .deepdanbooru_onnx import process_image
|
||||||
|
__version__ = '0.0.8'
|
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
244
DanbooruTagger/deepdanbooru_onnx/deepdanbooru_onnx.py
Normal file
|
@ -0,0 +1,244 @@
|
||||||
|
import onnxruntime as ort
|
||||||
|
from PIL import Image
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
import requests
|
||||||
|
import hashlib
|
||||||
|
from typing import List, Union
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def process_image(image: Image.Image) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Convert an image to a numpy array.
|
||||||
|
:param image: the image to convert
|
||||||
|
:return: the numpy array
|
||||||
|
"""
|
||||||
|
|
||||||
|
image = image.convert("RGB").resize((512, 512))
|
||||||
|
image = np.array(image).astype(np.float32) / 255
|
||||||
|
image = image.transpose((2, 0, 1)).reshape(1, 3, 512, 512).transpose((0, 2, 3, 1))
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def download(url: str, save_path: str, md5: str, length: str) -> bool:
|
||||||
|
"""
|
||||||
|
Download a file from url to save_path.
|
||||||
|
If the file already exists, check its md5.
|
||||||
|
If the md5 matches, return True,if the md5 doesn't match, return False.
|
||||||
|
:param url: the url of the file to download
|
||||||
|
:param save_path: the path to save the file
|
||||||
|
:param md5: the md5 of the file
|
||||||
|
:param length: the length of the file
|
||||||
|
:return: True if the file is downloaded successfully, False otherwise
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url=url, stream=True)
|
||||||
|
with open(save_path, "wb") as f:
|
||||||
|
with tqdm.wrapattr(
|
||||||
|
response.raw, "read", total=length, desc="Downloading"
|
||||||
|
) as r_raw:
|
||||||
|
shutil.copyfileobj(r_raw, f)
|
||||||
|
return (
|
||||||
|
True
|
||||||
|
if hashlib.md5(open(save_path, "rb").read()).hexdigest() == md5
|
||||||
|
else False
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def download_model():
|
||||||
|
"""
|
||||||
|
Download the model and tags file from the server.
|
||||||
|
:return: the path to the model and tags file
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_url = (
|
||||||
|
"https://huggingface.co/chinoll/deepdanbooru/resolve/main/deepdanbooru.onnx"
|
||||||
|
)
|
||||||
|
tags_url = "https://huggingface.co/chinoll/deepdanbooru/resolve/main/tags.txt"
|
||||||
|
model_md5 = "16be4e40ebcc0b1d1915bbf31f00969f"
|
||||||
|
tags_md5 = "a3f764de985cdeba89f1d232a4204402"
|
||||||
|
model_length = 643993025
|
||||||
|
tags_length = 133810
|
||||||
|
|
||||||
|
home = str(Path.home()) + "/.deepdanbooru_onnx/"
|
||||||
|
if not os.path.exists(home):
|
||||||
|
os.mkdir(home)
|
||||||
|
|
||||||
|
model_name = "deepdanbooru.onnx"
|
||||||
|
tags_name = "tags.txt"
|
||||||
|
|
||||||
|
model_path = home + model_name
|
||||||
|
tags_path = home + tags_name
|
||||||
|
if os.path.exists(model_path):
|
||||||
|
if hashlib.md5(open(model_path, "rb").read()).hexdigest() != model_md5:
|
||||||
|
os.remove(model_path)
|
||||||
|
if not download(model_url, model_path, model_md5, model_length):
|
||||||
|
raise ValueError("Model download failed")
|
||||||
|
|
||||||
|
else:
|
||||||
|
if not download(model_url, model_path, model_md5, model_length):
|
||||||
|
raise ValueError("Model download failed")
|
||||||
|
|
||||||
|
if os.path.exists(tags_path):
|
||||||
|
if hashlib.md5(open(tags_path, "rb").read()).hexdigest() != tags_md5:
|
||||||
|
os.remove(tags_path)
|
||||||
|
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||||
|
raise ValueError("Tags download failed")
|
||||||
|
else:
|
||||||
|
if not download(tags_url, tags_path, tags_md5, tags_length):
|
||||||
|
raise ValueError("Tags download failed")
|
||||||
|
return model_path, tags_path
|
||||||
|
|
||||||
|
|
||||||
|
class DeepDanbooru:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
mode: str = "auto",
|
||||||
|
model_path: Union[str, None] = None,
|
||||||
|
tags_path: Union[str, None] = None,
|
||||||
|
threshold: Union[float, int] = 0.6,
|
||||||
|
pin_memory: bool = False,
|
||||||
|
batch_size: int = 1,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the DeepDanbooru class.
|
||||||
|
:param mode: the mode of the model, "cpu" or "gpu" or "auto"
|
||||||
|
:param model_path: the path to the model file
|
||||||
|
:param tags_path: the path to the tags file
|
||||||
|
:param threshold: the threshold of the model
|
||||||
|
:param pin_memory: whether to use pin memory
|
||||||
|
:param batch_size: the batch size of the model
|
||||||
|
"""
|
||||||
|
|
||||||
|
providers = {
|
||||||
|
"cpu": "CPUExecutionProvider",
|
||||||
|
"gpu": "CUDAExecutionProvider",
|
||||||
|
"tensorrt": "TensorrtExecutionProvider",
|
||||||
|
"auto": (
|
||||||
|
"CUDAExecutionProvider"
|
||||||
|
if "CUDAExecutionProvider" in ort.get_available_providers()
|
||||||
|
else "CPUExecutionProvider"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
if not (isinstance(threshold, float) or isinstance(threshold, int)):
|
||||||
|
raise TypeError("threshold must be float or int")
|
||||||
|
if threshold < 0 or threshold > 1:
|
||||||
|
raise ValueError("threshold must be between 0 and 1")
|
||||||
|
if mode not in providers:
|
||||||
|
raise ValueError(
|
||||||
|
"Mode not supported. Please choose from: cpu, gpu, tensorrt"
|
||||||
|
)
|
||||||
|
if providers[mode] not in ort.get_available_providers():
|
||||||
|
raise ValueError(
|
||||||
|
f"Your device is not supported {mode}. Please choose from: cpu"
|
||||||
|
)
|
||||||
|
if model_path is not None and not os.path.exists(model_path):
|
||||||
|
raise FileNotFoundError("Model file not found")
|
||||||
|
if tags_path is not None and not os.path.exists(tags_path):
|
||||||
|
raise FileNotFoundError("Tags file not found")
|
||||||
|
|
||||||
|
if model_path is None or tags_path is None:
|
||||||
|
model_path, tags_path = download_model()
|
||||||
|
|
||||||
|
self.session = ort.InferenceSession(model_path, providers=[providers[mode]])
|
||||||
|
self.tags = [i.replace("\n", "") for i in open(tags_path, "r").readlines()]
|
||||||
|
|
||||||
|
self.input_name = self.session.get_inputs()[0].name
|
||||||
|
self.output_name = [output.name for output in self.session.get_outputs()]
|
||||||
|
self.threshold = threshold
|
||||||
|
self.pin_memory = pin_memory
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.mode = mode
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"DeepDanbooru(mode={self.mode}, threshold={self.threshold}, pin_memory={self.pin_memory}, batch_size={self.batch_size})"
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return self.__str__()
|
||||||
|
|
||||||
|
def from_image_inference(self, image: Image.Image) -> dict:
|
||||||
|
image = process_image(image)
|
||||||
|
return self.predict(image)
|
||||||
|
|
||||||
|
def from_ndarray_inferece(self, image: np.ndarray) -> dict:
|
||||||
|
if image.shape != (1, 512, 512, 3):
|
||||||
|
raise ValueError(f"Image must be {(1, 512, 512, 3)}")
|
||||||
|
return self.predict(image)
|
||||||
|
|
||||||
|
def from_file_inference(self, image: str) -> dict:
|
||||||
|
return self.from_image_inference(Image.open(image))
|
||||||
|
|
||||||
|
def from_list_inference(self, image: Union[list, tuple]) -> List[dict]:
|
||||||
|
if self.pin_memory:
|
||||||
|
image = [process_image(Image.open(i)) for i in image]
|
||||||
|
for i in [
|
||||||
|
image[i : i + self.batch_size]
|
||||||
|
for i in range(0, len(image), self.batch_size)
|
||||||
|
]:
|
||||||
|
imagelist = i
|
||||||
|
bs = len(i)
|
||||||
|
_imagelist, idx, hashlist = [], [], []
|
||||||
|
for j in range(len(i)):
|
||||||
|
img = Image.open(i[j]) if not self.pin_memory else imagelist[j]
|
||||||
|
image_hash = hashlib.md5(np.array(img).astype(np.uint8)).hexdigest()
|
||||||
|
hashlist.append(image_hash)
|
||||||
|
if image_hash in self.cache:
|
||||||
|
continue
|
||||||
|
if not self.pin_memory:
|
||||||
|
_imagelist.append(process_image(img))
|
||||||
|
else:
|
||||||
|
_imagelist.append(imagelist[j])
|
||||||
|
idx.append(j)
|
||||||
|
|
||||||
|
imagelist = _imagelist
|
||||||
|
if len(imagelist) != 0:
|
||||||
|
_image = np.vstack(imagelist)
|
||||||
|
results = self.inference(_image)
|
||||||
|
results_idx = 0
|
||||||
|
else:
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for i in range(bs):
|
||||||
|
image_tag = {}
|
||||||
|
if i in idx:
|
||||||
|
hash = hashlist[i]
|
||||||
|
for tag, score in zip(self.tags, results[results_idx]):
|
||||||
|
if score >= self.threshold:
|
||||||
|
image_tag[tag] = score
|
||||||
|
results_idx += 1
|
||||||
|
self.cache[hash] = image_tag
|
||||||
|
yield image_tag
|
||||||
|
else:
|
||||||
|
yield self.cache[hashlist[i]]
|
||||||
|
|
||||||
|
def inference(self, image):
|
||||||
|
return self.session.run(self.output_name, {self.input_name: image})[0]
|
||||||
|
|
||||||
|
def predict(self, image):
|
||||||
|
result = self.inference(image)
|
||||||
|
image_tag = {}
|
||||||
|
for tag, score in zip(self.tags, result[0]):
|
||||||
|
if score >= self.threshold:
|
||||||
|
image_tag[tag] = score
|
||||||
|
return image_tag
|
||||||
|
|
||||||
|
def __call__(self, image) -> Union[dict, List[dict]]:
|
||||||
|
if isinstance(image, str):
|
||||||
|
return self.from_file_inference(image)
|
||||||
|
elif isinstance(image, np.ndarray):
|
||||||
|
return self.from_ndarray_inferece(image)
|
||||||
|
elif isinstance(image, list) or isinstance(image, tuple):
|
||||||
|
return self.from_list_inference(image)
|
||||||
|
elif isinstance(image, Image.Image):
|
||||||
|
return self.from_image_inference(image)
|
||||||
|
else:
|
||||||
|
raise ValueError("Image must be a file path or a numpy array or list/tuple")
|
3
DanbooruTagger/example.py
Normal file
3
DanbooruTagger/example.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from deepdanbooru_onnx import DeepDanbooru
|
||||||
|
danbooru = DeepDanbooru()
|
||||||
|
print(danbooru("/run/media/philipp/20404acc-312c-44f2-b2d1-3a0a14257cc6/.Media/porn/00244-3145022840.png"))
|
154
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
154
PersonDatasetAssembler/PersonDatasetAssembler.py
Executable file
|
@ -0,0 +1,154 @@
|
||||||
|
#!/bin/python3
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from typing import Iterator
|
||||||
|
import cv2
|
||||||
|
import numpy
|
||||||
|
from tqdm import tqdm
|
||||||
|
from wand.exceptions import BlobError
|
||||||
|
from wand.image import Image
|
||||||
|
|
||||||
|
image_ext_ocv = [".bmp", ".jpeg", ".jpg", ".png"]
|
||||||
|
image_ext_wand = [".dng", ".arw"]
|
||||||
|
|
||||||
|
|
||||||
|
class LoadException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def find_image_files(path: str) -> list[str]:
|
||||||
|
paths = list()
|
||||||
|
for root, dirs, files in os.walk(path):
|
||||||
|
for filename in files:
|
||||||
|
name, extension = os.path.splitext(filename)
|
||||||
|
if extension.lower() in image_ext_ocv or extension in image_ext_wand:
|
||||||
|
paths.append(os.path.join(root, filename))
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
def image_loader(paths: list[str]) -> Iterator[numpy.ndarray]:
|
||||||
|
for path in paths:
|
||||||
|
name, extension = os.path.splitext(path)
|
||||||
|
extension = extension.lower()
|
||||||
|
if extension in image_ext_ocv:
|
||||||
|
image = cv2.imread(path)
|
||||||
|
if image is None:
|
||||||
|
print(f"Warning: could not load {path}")
|
||||||
|
else:
|
||||||
|
yield image
|
||||||
|
elif extension in image_ext_wand:
|
||||||
|
try:
|
||||||
|
image = Image(filename=path)
|
||||||
|
except BlobError as e:
|
||||||
|
print(f"Warning: could not load {path}, {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_images(video: cv2.VideoCapture, interval: int = 0):
|
||||||
|
ret = True
|
||||||
|
frame_counter = 0
|
||||||
|
while ret:
|
||||||
|
video.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
|
||||||
|
ret, frame = video.read()
|
||||||
|
if ret:
|
||||||
|
yield frame
|
||||||
|
frame_counter += interval
|
||||||
|
|
||||||
|
|
||||||
|
def contains_face_match(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, image: numpy.ndarray, referance_features: list(), thresh: float) -> bool:
|
||||||
|
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||||
|
faces = detector.detect(image)[1]
|
||||||
|
if faces is None:
|
||||||
|
return 0, False
|
||||||
|
for face in faces:
|
||||||
|
cropped_image = recognizer.alignCrop(image, face)
|
||||||
|
features = recognizer.feature(cropped_image)
|
||||||
|
score_accum = 0.0
|
||||||
|
for referance in referance_features:
|
||||||
|
score_accum += recognizer.match(referance, features, 0)
|
||||||
|
score = score_accum / len(referance_features)
|
||||||
|
if score > thresh:
|
||||||
|
return score, True
|
||||||
|
return 0, False
|
||||||
|
|
||||||
|
|
||||||
|
def process_referance(detector: cv2.FaceDetectorYN, recognizer: cv2.FaceRecognizerSF, referance_path: str) -> list():
|
||||||
|
images = list()
|
||||||
|
out = list()
|
||||||
|
|
||||||
|
if os.path.isfile(referance_path):
|
||||||
|
image = cv2.imread(referance_path)
|
||||||
|
if image is None:
|
||||||
|
print(f"Could not load image from {referance_path}")
|
||||||
|
else:
|
||||||
|
images.append(image)
|
||||||
|
elif os.path.isdir(referance_path):
|
||||||
|
filenames = find_image_files(referance_path)
|
||||||
|
images = list(image_loader(filenames))
|
||||||
|
|
||||||
|
for image in images:
|
||||||
|
detector.setInputSize([image.shape[1], image.shape[0]])
|
||||||
|
faces = detector.detect(image)[1]
|
||||||
|
if faces is None:
|
||||||
|
print("unable to find face in referance image")
|
||||||
|
exit(1)
|
||||||
|
image = recognizer.alignCrop(image, faces[0])
|
||||||
|
features = recognizer.feature(image)
|
||||||
|
out.append(features)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser("Script to assemble a dataset of images of a specific person")
|
||||||
|
parser.add_argument('--out', '-o', default="out", help="place to put dataset")
|
||||||
|
parser.add_argument('--input', '-i', required=True, help="directory or video file to get images from")
|
||||||
|
parser.add_argument('--skip', '-s', default=0, type=int, help="skip n frames between samples when grabbing from a video file")
|
||||||
|
parser.add_argument('--referance', '-r', required=True, help="referance image or directory of images of the person to be found")
|
||||||
|
parser.add_argument('--match_model', '-m', required=True, help="Path to the onnx recognition model to be used")
|
||||||
|
parser.add_argument('--detect_model', '-d', required=True, help="Path to the onnx detection model to be used")
|
||||||
|
parser.add_argument('--threshold', '-t', default=0.362, type=float, help="match threshold to use")
|
||||||
|
parser.add_argument('--invert', '-n', action='store_true', help="output files that DONT match")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
recognizer = cv2.FaceRecognizerSF.create(model=args.match_model, config="", backend_id=cv2.dnn.DNN_BACKEND_DEFAULT , target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||||
|
detector = cv2.FaceDetectorYN.create(model=args.detect_model, config="", input_size=[320, 320],
|
||||||
|
score_threshold=0.6, nms_threshold=0.3, top_k=5000, backend_id=cv2.dnn.DNN_BACKEND_DEFAULT, target_id=cv2.dnn.DNN_TARGET_CPU)
|
||||||
|
|
||||||
|
referance_features = process_referance(detector, recognizer, args.referance)
|
||||||
|
if len(referance_features) < 1:
|
||||||
|
print(f"Could not load any referance image(s) from {args.referance}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if os.path.isfile(args.input):
|
||||||
|
video = cv2.VideoCapture(args.input)
|
||||||
|
if not video.isOpened():
|
||||||
|
print(f"Unable to open {args.input} as a video file")
|
||||||
|
exit(1)
|
||||||
|
image_generator = extract_video_images(video, args.skip + 1)
|
||||||
|
total_images = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / (args.skip + 1)
|
||||||
|
elif os.path.isdir(args.input):
|
||||||
|
image_filenams = find_image_files(args.input)
|
||||||
|
image_generator = image_loader(image_filenams)
|
||||||
|
total_images = len(image_filenams)
|
||||||
|
else:
|
||||||
|
print(f"{args.input} is not a video file nor is it a directory")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
os.makedirs(args.out, exist_ok=True)
|
||||||
|
|
||||||
|
progress = tqdm(total=int(total_images), desc="0.00")
|
||||||
|
counter = 0
|
||||||
|
for image in image_generator:
|
||||||
|
if image.shape[0] > 512:
|
||||||
|
aspect = image.shape[0] / image.shape[1]
|
||||||
|
resized = cv2.resize(image, (int(512 / aspect), 512), 0, 0, cv2.INTER_AREA)
|
||||||
|
else:
|
||||||
|
resized = image
|
||||||
|
score, match = contains_face_match(detector, recognizer, resized, referance_features, args.threshold)
|
||||||
|
if match and not args.invert or not match and args.invert:
|
||||||
|
filename = f"{counter:04}.png"
|
||||||
|
cv2.imwrite(os.path.join(args.out, filename), image)
|
||||||
|
counter += 1
|
||||||
|
progress.set_description(f"{score:1.2f}")
|
||||||
|
progress.update()
|
||||||
|
|
16
SmartCrop/CMakeLists.txt
Normal file
16
SmartCrop/CMakeLists.txt
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
cmake_minimum_required(VERSION 3.6)
|
||||||
|
|
||||||
|
find_package(OpenCV REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
set(SRC_FILES main.cpp yolo.cpp tokenize.cpp log.cpp seamcarving.cpp utils.cpp intelligentroi.cpp facerecognizer.cpp)
|
||||||
|
|
||||||
|
add_executable(smartcrop ${SRC_FILES})
|
||||||
|
target_link_libraries(smartcrop ${OpenCV_LIBS} -ltbb)
|
||||||
|
target_include_directories(smartcrop PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||||
|
target_compile_options(smartcrop PRIVATE -s -g -Wall)
|
||||||
|
message(WARNING ${WEIGHT_DIR})
|
||||||
|
target_compile_definitions(smartcrop PUBLIC WEIGHT_DIR="${WEIGHT_DIR}")
|
||||||
|
|
||||||
|
install(TARGETS smartcrop RUNTIME DESTINATION bin)
|
143
SmartCrop/facerecognizer.cpp
Normal file
143
SmartCrop/facerecognizer.cpp
Normal file
|
@ -0,0 +1,143 @@
|
||||||
|
#include "facerecognizer.h"
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
#define INCBIN_PREFIX r
|
||||||
|
#include "incbin.h"
|
||||||
|
|
||||||
|
INCBIN(defaultRecognizer, WEIGHT_DIR "/face_recognition_sface_2021dec.onnx");
|
||||||
|
INCBIN(defaultDetector, WEIGHT_DIR "/face_detection_yunet_2023mar.onnx");
|
||||||
|
|
||||||
|
#include <opencv2/dnn/dnn.hpp>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/highgui.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
static const std::vector<unsigned char> onnx((unsigned char*)rdefaultDetectorData, ((unsigned char*)rdefaultDetectorData)+rdefaultDetectorSize);
|
||||||
|
|
||||||
|
FaceRecognizer::FaceRecognizer(std::filesystem::path recognizerPath, const std::filesystem::path& detectorPath, const std::vector<cv::Mat>& referances)
|
||||||
|
{
|
||||||
|
if(detectorPath.empty())
|
||||||
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin face detection model";
|
||||||
|
|
||||||
|
detector = cv::FaceDetectorYN::create("onnx", onnx, std::vector<unsigned char>(), {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
if(!detector)
|
||||||
|
throw LoadException("Unable to load detector network from built in file");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
detector = cv::FaceDetectorYN::create(detectorPath, "", {320, 320}, 0.6, 0.3, 5000, cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
if(!detector)
|
||||||
|
throw LoadException("Unable to load detector network from "+detectorPath.string());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool defaultNetwork = recognizerPath.empty();
|
||||||
|
|
||||||
|
if(defaultNetwork)
|
||||||
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin face recognition model";
|
||||||
|
recognizerPath = cv::tempfile("onnx");
|
||||||
|
std::ofstream file(recognizerPath);
|
||||||
|
if(!file.is_open())
|
||||||
|
throw LoadException("Unable open temporary file at "+recognizerPath.string());
|
||||||
|
Log(Log::DEBUG)<<"Using "<<recognizerPath<<" as temporary file for onnx recongnition network";
|
||||||
|
file.write(reinterpret_cast<const char*>(rdefaultRecognizerData), rdefaultRecognizerSize);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = cv::FaceRecognizerSF::create(recognizerPath.string(), "", cv::dnn::Backend::DNN_BACKEND_OPENCV, cv::dnn::Target::DNN_TARGET_CPU);
|
||||||
|
|
||||||
|
if(defaultNetwork)
|
||||||
|
std::filesystem::remove(recognizerPath);
|
||||||
|
|
||||||
|
if(!recognizer)
|
||||||
|
throw LoadException("Unable to load recognizer network from "+recognizerPath.string());
|
||||||
|
|
||||||
|
addReferances(referances);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat FaceRecognizer::detectFaces(const cv::Mat& input)
|
||||||
|
{
|
||||||
|
detector->setInputSize(input.size());
|
||||||
|
cv::Mat faces;
|
||||||
|
detector->detect(input, faces);
|
||||||
|
return faces;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FaceRecognizer::addReferances(const std::vector<cv::Mat>& referances)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
for(const cv::Mat& image : referances)
|
||||||
|
{
|
||||||
|
cv::Mat faces = detectFaces(image);
|
||||||
|
assert(faces.cols == 15);
|
||||||
|
if(faces.empty())
|
||||||
|
{
|
||||||
|
Log(Log::WARN)<<"A referance image provided dose not contian any face";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(faces.rows > 1)
|
||||||
|
Log(Log::WARN)<<"A referance image provided contains more than one face, only the first detected face will be considered";
|
||||||
|
cv::Mat cropedImage;
|
||||||
|
recognizer->alignCrop(image, faces.row(0), cropedImage);
|
||||||
|
cv::Mat features;
|
||||||
|
recognizer->feature(cropedImage, features);
|
||||||
|
referanceFeatures.push_back(features.clone());
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FaceRecognizer::setThreshold(double threasholdIn)
|
||||||
|
{
|
||||||
|
threshold = threasholdIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
double FaceRecognizer::getThreshold()
|
||||||
|
{
|
||||||
|
return threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FaceRecognizer::clearReferances()
|
||||||
|
{
|
||||||
|
referanceFeatures.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
FaceRecognizer::Detection FaceRecognizer::isMatch(const cv::Mat& input, bool alone)
|
||||||
|
{
|
||||||
|
cv::Mat faces = detectFaces(input);
|
||||||
|
|
||||||
|
Detection bestMatch;
|
||||||
|
bestMatch.confidence = 0;
|
||||||
|
bestMatch.person = -1;
|
||||||
|
|
||||||
|
if(alone && faces.rows > 1)
|
||||||
|
{
|
||||||
|
bestMatch.person = -2;
|
||||||
|
return bestMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < faces.rows; ++i)
|
||||||
|
{
|
||||||
|
cv::Mat face;
|
||||||
|
recognizer->alignCrop(input, faces.row(i), face);
|
||||||
|
cv::Mat features;
|
||||||
|
recognizer->feature(face, features);
|
||||||
|
features = features.clone();
|
||||||
|
for(size_t referanceIndex = 0; referanceIndex < referanceFeatures.size(); ++referanceIndex)
|
||||||
|
{
|
||||||
|
double score = recognizer->match(referanceFeatures[referanceIndex], features, cv::FaceRecognizerSF::FR_COSINE);
|
||||||
|
if(score > threshold && score > bestMatch.confidence)
|
||||||
|
{
|
||||||
|
bestMatch.confidence = score;
|
||||||
|
bestMatch.person = referanceIndex;
|
||||||
|
bestMatch.rect = cv::Rect(faces.at<int>(i, 0), faces.at<int>(i, 1), faces.at<int>(i, 2), faces.at<int>(i, 3));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestMatch;
|
||||||
|
}
|
48
SmartCrop/facerecognizer.h
Normal file
48
SmartCrop/facerecognizer.h
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
#include <opencv2/objdetect/face.hpp>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
class FaceRecognizer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
struct Detection
|
||||||
|
{
|
||||||
|
int person;
|
||||||
|
float confidence;
|
||||||
|
cv::Rect rect;
|
||||||
|
};
|
||||||
|
|
||||||
|
class LoadException : public std::exception
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
std::string message;
|
||||||
|
public:
|
||||||
|
LoadException(const std::string& msg): std::exception(), message(msg) {}
|
||||||
|
virtual const char* what() const throw() override
|
||||||
|
{
|
||||||
|
return message.c_str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<cv::Mat> referanceFeatures;
|
||||||
|
std::shared_ptr<cv::FaceRecognizerSF> recognizer;
|
||||||
|
std::shared_ptr<cv::FaceDetectorYN> detector;
|
||||||
|
|
||||||
|
double threshold = 0.363;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FaceRecognizer(std::filesystem::path recognizerPath = "", const std::filesystem::path& detectorPath = "", const std::vector<cv::Mat>& referances = std::vector<cv::Mat>());
|
||||||
|
cv::Mat detectFaces(const cv::Mat& input);
|
||||||
|
Detection isMatch(const cv::Mat& input, bool alone = false);
|
||||||
|
bool addReferances(const std::vector<cv::Mat>& referances);
|
||||||
|
void setThreshold(double threashold);
|
||||||
|
double getThreshold();
|
||||||
|
void clearReferances();
|
||||||
|
};
|
|
@ -31,11 +31,12 @@ void InteligentRoi::slideRectToPoint(cv::Rect& rect, const cv::Point2i& point)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
cv::Rect InteligentRoi::maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude)
|
||||||
{
|
{
|
||||||
int radius = std::min(imageSize.height, imageSize.width)/2;
|
incompleate = false;
|
||||||
|
int diameter = std::min(imageSize.height, imageSize.width);
|
||||||
cv::Point2i point(imageSize.width/2, imageSize.height/2);
|
cv::Point2i point(imageSize.width/2, imageSize.height/2);
|
||||||
cv::Rect candiate(point.x-radius, point.y-radius, radius*2, radius*2);
|
cv::Rect candiate(point.x-diameter/2, point.y-diameter/2, diameter, diameter);
|
||||||
|
|
||||||
std::sort(mustInclude.begin(), mustInclude.end(),
|
std::sort(mustInclude.begin(), mustInclude.end(),
|
||||||
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
|
[&point](const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b){return compPointPrio(a, b, point);});
|
||||||
|
@ -43,8 +44,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
|
||||||
while(true)
|
while(true)
|
||||||
{
|
{
|
||||||
cv::Rect includeRect = rectFromPoints(mustInclude);
|
cv::Rect includeRect = rectFromPoints(mustInclude);
|
||||||
if(includeRect.width-2 > radius || includeRect.height-2 > radius)
|
if(includeRect.width-2 > diameter || includeRect.height-2 > diameter)
|
||||||
{
|
{
|
||||||
|
incompleate = true;
|
||||||
slideRectToPoint(candiate, mustInclude.back().first);
|
slideRectToPoint(candiate, mustInclude.back().first);
|
||||||
mustInclude.pop_back();
|
mustInclude.pop_back();
|
||||||
Log(Log::DEBUG)<<"cant fill";
|
Log(Log::DEBUG)<<"cant fill";
|
||||||
|
@ -52,7 +54,9 @@ cv::Rect InteligentRoi::maxRect(const cv::Size2i& imageSize, std::vector<std::pa
|
||||||
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
|
Log(Log::DEBUG)<<mipoint.first<<' '<<pointDist(mipoint.first, point)<<' '<<mipoint.second;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
|
for(const std::pair<cv::Point2i, int>& includePoint : mustInclude)
|
||||||
|
@ -75,25 +79,30 @@ InteligentRoi::InteligentRoi(const Yolo& yolo)
|
||||||
personId = yolo.getClassForStr("person");
|
personId = yolo.getClassForStr("person");
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Rect InteligentRoi::getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
bool InteligentRoi::getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize)
|
||||||
{
|
{
|
||||||
if(!detections.empty())
|
std::vector<std::pair<cv::Point2i, int>> corners;
|
||||||
|
for(size_t i = 0; i < detections.size(); ++i)
|
||||||
{
|
{
|
||||||
std::vector<std::pair<cv::Point2i, int>> corners;
|
int priority = detections[i].priority;
|
||||||
for(size_t i = 0; i < detections.size(); ++i)
|
if(detections[i].class_id == personId)
|
||||||
|
{
|
||||||
|
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+2});
|
||||||
|
corners.push_back({detections[i].box.tl(), priority+1});
|
||||||
|
corners.push_back({detections[i].box.br(), priority});
|
||||||
|
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority+1});
|
||||||
|
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
int priority = detections[i].priority;
|
|
||||||
if(detections[i].class_id == personId)
|
|
||||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width/2, 0), priority+1});
|
|
||||||
corners.push_back({detections[i].box.tl(), priority});
|
corners.push_back({detections[i].box.tl(), priority});
|
||||||
corners.push_back({detections[i].box.br(), priority});
|
corners.push_back({detections[i].box.br(), priority});
|
||||||
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
|
corners.push_back({detections[i].box.tl()+cv::Point2i(detections[i].box.width, 0), priority});
|
||||||
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
corners.push_back({detections[i].box.br()+cv::Point2i(0-detections[i].box.width, 0), priority});
|
||||||
}
|
}
|
||||||
|
|
||||||
return maxRect(imageSize, corners);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Log(Log::DEBUG)<<"Using center crop as there are no detections";
|
bool incompleate;
|
||||||
return maxRect(imageSize);
|
out = maxRect(incompleate, imageSize, corners);
|
||||||
|
return incompleate;
|
||||||
}
|
}
|
|
@ -10,9 +10,9 @@ private:
|
||||||
int personId;
|
int personId;
|
||||||
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
static bool compPointPrio(const std::pair<cv::Point2i, int>& a, const std::pair<cv::Point2i, int>& b, const cv::Point2i& center);
|
||||||
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
static void slideRectToPoint(cv::Rect& rect, const cv::Point2i& point);
|
||||||
static cv::Rect maxRect(const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
static cv::Rect maxRect(bool& incompleate, const cv::Size2i& imageSize, std::vector<std::pair<cv::Point2i, int>> mustInclude = {});
|
||||||
|
|
||||||
public:
|
public:
|
||||||
InteligentRoi(const Yolo& yolo);
|
InteligentRoi(const Yolo& yolo);
|
||||||
cv::Rect getCropRectangle(const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
bool getCropRectangle(cv::Rect& out, const std::vector<Yolo::Detection>& detections, const cv::Size2i& imageSize);
|
||||||
};
|
};
|
440
SmartCrop/main.cpp
Normal file
440
SmartCrop/main.cpp
Normal file
|
@ -0,0 +1,440 @@
|
||||||
|
#include <filesystem>
|
||||||
|
#include <iostream>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
#include <opencv2/highgui.hpp>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <execution>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
#include "yolo.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "options.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "intelligentroi.h"
|
||||||
|
#include "seamcarving.h"
|
||||||
|
#include "facerecognizer.h"
|
||||||
|
|
||||||
|
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
||||||
|
{
|
||||||
|
const Yolo::Detection* inDetection = nullptr;
|
||||||
|
for(const Yolo::Detection& detection : detections)
|
||||||
|
{
|
||||||
|
if(ignore && ignore == &detection)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if(detection.box.x <= x && detection.box.x+detection.box.width >= x)
|
||||||
|
{
|
||||||
|
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
||||||
|
inDetection = &detection;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return inDetection;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
||||||
|
{
|
||||||
|
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
||||||
|
|
||||||
|
Log(Log::DEBUG, false)<<__func__<<" point "<<x;
|
||||||
|
|
||||||
|
if(!inDetection)
|
||||||
|
{
|
||||||
|
const Yolo::Detection* closest = nullptr;
|
||||||
|
for(const Yolo::Detection& detection : detections)
|
||||||
|
{
|
||||||
|
if(detection.box.x > x)
|
||||||
|
{
|
||||||
|
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
||||||
|
closest = &detection;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(closest)
|
||||||
|
x = closest->box.x;
|
||||||
|
else
|
||||||
|
x = imgSizeX;
|
||||||
|
|
||||||
|
Log(Log::DEBUG)<<" is not in any box and will be moved to "<<x<<" where the closest box ("<<(closest ? closest->className : "null")<<") is";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
x = inDetection->box.br().x;
|
||||||
|
Log(Log::DEBUG, false)<<" is in a box and will be moved to its end "<<x<<" where ";
|
||||||
|
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
||||||
|
if(candidateDetection && candidateDetection->box.br().x > x)
|
||||||
|
{
|
||||||
|
Log(Log::DEBUG)<<"it is again in a box";
|
||||||
|
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Log(Log::DEBUG)<<"it is not in a box";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
||||||
|
{
|
||||||
|
std::vector<std::pair<cv::Mat, bool>> out;
|
||||||
|
|
||||||
|
std::cout<<__func__<<' '<<image.cols<<'x'<<image.rows<<std::endl;
|
||||||
|
|
||||||
|
for(int x = 0; x < image.cols; ++x)
|
||||||
|
{
|
||||||
|
int start = x;
|
||||||
|
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
||||||
|
|
||||||
|
int width = x-start;
|
||||||
|
if(x < image.cols)
|
||||||
|
++width;
|
||||||
|
cv::Rect rect(start, 0, width, image.rows);
|
||||||
|
Log(Log::DEBUG)<<__func__<<" region\t"<<rect;
|
||||||
|
cv::Mat slice = image(rect);
|
||||||
|
out.push_back({slice, frozen});
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat assembleFromSlicesHoriz(const std::vector<std::pair<cv::Mat, bool>>& slices)
|
||||||
|
{
|
||||||
|
assert(!slices.empty());
|
||||||
|
|
||||||
|
int cols = 0;
|
||||||
|
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||||
|
cols += slice.first.cols;
|
||||||
|
|
||||||
|
|
||||||
|
cv::Mat image(cols, slices[0].first.rows, slices[0].first.type());
|
||||||
|
Log(Log::DEBUG)<<__func__<<' '<<image.size()<<' '<<cols<<' '<<slices[0].first.rows;
|
||||||
|
|
||||||
|
int col = 0;
|
||||||
|
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||||
|
{
|
||||||
|
cv::Rect rect(col, 0, slice.first.cols, slice.first.rows);
|
||||||
|
Log(Log::DEBUG)<<__func__<<' '<<rect;
|
||||||
|
slice.first.copyTo(image(rect));
|
||||||
|
col += slice.first.cols-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
|
||||||
|
void transposeRect(cv::Rect& rect)
|
||||||
|
{
|
||||||
|
int x = rect.x;
|
||||||
|
rect.x = rect.y;
|
||||||
|
rect.y = x;
|
||||||
|
|
||||||
|
int width = rect.width;
|
||||||
|
rect.width = rect.height;
|
||||||
|
rect.height = width;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool seamCarveResize(cv::Mat& image, std::vector<Yolo::Detection> detections, double targetAspectRatio = 1.0)
|
||||||
|
{
|
||||||
|
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const Yolo::Detection& detection){return detection.priority < 3;}), detections.end());
|
||||||
|
|
||||||
|
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
||||||
|
|
||||||
|
Log(Log::DEBUG)<<"Image size "<<image.size()<<" aspect ratio "<<aspectRatio<<" target aspect ratio "<<targetAspectRatio;
|
||||||
|
|
||||||
|
bool vertical = false;
|
||||||
|
if(aspectRatio > targetAspectRatio)
|
||||||
|
vertical = true;
|
||||||
|
|
||||||
|
int requiredLines = 0;
|
||||||
|
if(!vertical)
|
||||||
|
requiredLines = image.rows*targetAspectRatio - image.cols;
|
||||||
|
else
|
||||||
|
requiredLines = image.cols/targetAspectRatio - image.rows;
|
||||||
|
|
||||||
|
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
||||||
|
|
||||||
|
if(vertical)
|
||||||
|
{
|
||||||
|
cv::transpose(image, image);
|
||||||
|
for(Yolo::Detection& detection : detections)
|
||||||
|
transposeRect(detection.box);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
||||||
|
Log(Log::DEBUG)<<"Image has "<<slices.size()<<" slices:";
|
||||||
|
int totalResizableSize = 0;
|
||||||
|
for(const std::pair<cv::Mat, bool>& slice : slices)
|
||||||
|
{
|
||||||
|
Log(Log::DEBUG)<<"a "<<(slice.second ? "frozen" : "unfrozen")<<" slice of size "<<slice.first.cols;
|
||||||
|
if(!slice.second)
|
||||||
|
totalResizableSize += slice.first.cols;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(totalResizableSize < requiredLines+1)
|
||||||
|
{
|
||||||
|
Log(Log::WARN)<<"Unable to seam carve as there are only "<<totalResizableSize<<" unfrozen cols";
|
||||||
|
if(vertical)
|
||||||
|
cv::transpose(image, image);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> seamsForSlice(slices.size(), 0);
|
||||||
|
for(size_t i = 0; i < slices.size(); ++i)
|
||||||
|
{
|
||||||
|
if(!slices[i].second)
|
||||||
|
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
int residual = requiredLines - std::accumulate(seamsForSlice.begin(), seamsForSlice.end(), decltype(seamsForSlice)::value_type(0));;
|
||||||
|
for(ssize_t i = slices.size()-1; i >= 0; --i)
|
||||||
|
{
|
||||||
|
if(!slices[i].second)
|
||||||
|
{
|
||||||
|
seamsForSlice[i] += residual;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(size_t i = 0; i < slices.size(); ++i)
|
||||||
|
{
|
||||||
|
if(seamsForSlice[i] != 0)
|
||||||
|
{
|
||||||
|
bool ret = SeamCarving::strechImage(slices[i].first, seamsForSlice[i], true);
|
||||||
|
if(!ret)
|
||||||
|
{
|
||||||
|
if(vertical)
|
||||||
|
transpose(image, image);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
image = assembleFromSlicesHoriz(slices);
|
||||||
|
|
||||||
|
if(vertical)
|
||||||
|
cv::transpose(image, image);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
||||||
|
{
|
||||||
|
for(const Yolo::Detection& detection : detections)
|
||||||
|
{
|
||||||
|
cv::rectangle(image, detection.box, detection.color, 3);
|
||||||
|
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4) + ' ' + std::to_string(detection.priority);
|
||||||
|
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 1, 1, 0);
|
||||||
|
cv::Rect textBox(detection.box.x, detection.box.y - 40, labelSize.width + 10, labelSize.height + 20);
|
||||||
|
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
||||||
|
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reduceSize(cv::Mat& image, const cv::Size& targetSize)
|
||||||
|
{
|
||||||
|
int longTargetSize = std::max(targetSize.width, targetSize.height)*2;
|
||||||
|
if(std::max(image.cols, image.rows) > longTargetSize)
|
||||||
|
{
|
||||||
|
if(image.cols > image.rows)
|
||||||
|
{
|
||||||
|
double ratio = static_cast<double>(longTargetSize)/image.cols;
|
||||||
|
cv::resize(image, image, {longTargetSize, static_cast<int>(image.rows*ratio)}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double ratio = static_cast<double>(longTargetSize)/image.rows;
|
||||||
|
cv::resize(image, image, {static_cast<int>(image.cols*ratio), longTargetSize}, 0, 0, ratio < 1 ? cv::INTER_AREA : cv::INTER_CUBIC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pipeline(const std::filesystem::path& path, const Config& config, Yolo& yolo, FaceRecognizer* recognizer,
|
||||||
|
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||||
|
{
|
||||||
|
InteligentRoi intRoi(yolo);
|
||||||
|
cv::Mat image = cv::imread(path);
|
||||||
|
if(!image.data)
|
||||||
|
{
|
||||||
|
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
reduceSize(image, config.targetSize);
|
||||||
|
|
||||||
|
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
||||||
|
|
||||||
|
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
||||||
|
for(Yolo::Detection& detection : detections)
|
||||||
|
{
|
||||||
|
bool hasmatch = false;
|
||||||
|
if(recognizer && detection.className == "person")
|
||||||
|
{
|
||||||
|
cv::Mat person = image(detection.box);
|
||||||
|
reconizerMutex.lock();
|
||||||
|
FaceRecognizer::Detection match = recognizer->isMatch(person);
|
||||||
|
reconizerMutex.unlock();
|
||||||
|
if(match.person >= 0)
|
||||||
|
{
|
||||||
|
detection.priority += 10;
|
||||||
|
hasmatch = true;
|
||||||
|
detections.push_back({0, "Face", match.confidence, 20, {255, 0, 0}, match.rect});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority<<(hasmatch ? " has match" : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Rect crop;
|
||||||
|
bool incompleate = intRoi.getCropRectangle(crop, detections, image.size());
|
||||||
|
|
||||||
|
if(config.seamCarving && incompleate)
|
||||||
|
{
|
||||||
|
bool ret = seamCarveResize(image, detections, config.targetSize.aspectRatio());
|
||||||
|
if(ret && image.size().aspectRatio() != config.targetSize.aspectRatio())
|
||||||
|
{
|
||||||
|
detections = yolo.runInference(image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat croppedImage;
|
||||||
|
|
||||||
|
if(image.size().aspectRatio() != config.targetSize.aspectRatio() && incompleate)
|
||||||
|
{
|
||||||
|
intRoi.getCropRectangle(crop, detections, image.size());
|
||||||
|
|
||||||
|
if(config.debug)
|
||||||
|
{
|
||||||
|
cv::Mat debugImage = image.clone();
|
||||||
|
drawDebugInfo(debugImage, crop, detections);
|
||||||
|
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
||||||
|
if(!ret)
|
||||||
|
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
||||||
|
}
|
||||||
|
|
||||||
|
croppedImage = image(crop);
|
||||||
|
}
|
||||||
|
else if(!incompleate)
|
||||||
|
{
|
||||||
|
croppedImage = image(crop);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
croppedImage = image;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat resizedImage;
|
||||||
|
cv::resize(croppedImage, resizedImage, config.targetSize, 0, 0, cv::INTER_CUBIC);
|
||||||
|
bool ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
||||||
|
if(!ret)
|
||||||
|
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
||||||
|
}
|
||||||
|
|
||||||
|
void threadFn(const std::vector<std::filesystem::path>& images, const Config& config, FaceRecognizer* recognizer,
|
||||||
|
std::mutex& reconizerMutex, const std::filesystem::path& debugOutputPath)
|
||||||
|
{
|
||||||
|
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
||||||
|
for(std::filesystem::path path : images)
|
||||||
|
pipeline(path, config, yolo, recognizer, reconizerMutex, debugOutputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::vector<std::vector<T>> splitVector(const std::vector<T>& vec, size_t parts)
|
||||||
|
{
|
||||||
|
std::vector<std::vector<T>> out;
|
||||||
|
|
||||||
|
size_t length = vec.size()/parts;
|
||||||
|
size_t remain = vec.size() % parts;
|
||||||
|
|
||||||
|
size_t begin = 0;
|
||||||
|
size_t end = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < std::min(parts, vec.size()); ++i)
|
||||||
|
{
|
||||||
|
end += (remain > 0) ? (length + !!(remain--)) : length;
|
||||||
|
out.push_back(std::vector<T>(vec.begin() + begin, vec.begin() + end));
|
||||||
|
begin = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
Log::level = Log::INFO;
|
||||||
|
|
||||||
|
Config config;
|
||||||
|
argp_parse(&argp, argc, argv, 0, 0, &config);
|
||||||
|
|
||||||
|
if(config.outputDir.empty())
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(config.imagePaths.empty())
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"at least one input image or directory is required";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::filesystem::path> imagePaths;
|
||||||
|
|
||||||
|
for(const std::filesystem::path& path : config.imagePaths)
|
||||||
|
getImageFiles(path, imagePaths);
|
||||||
|
|
||||||
|
Log(Log::DEBUG)<<"Images:";
|
||||||
|
for(const::std::filesystem::path& path: imagePaths)
|
||||||
|
Log(Log::DEBUG)<<path;
|
||||||
|
|
||||||
|
if(imagePaths.empty())
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"no image was found\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!std::filesystem::exists(config.outputDir))
|
||||||
|
{
|
||||||
|
if(!std::filesystem::create_directory(config.outputDir))
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
||||||
|
if(config.debug)
|
||||||
|
{
|
||||||
|
if(!std::filesystem::exists(debugOutputPath))
|
||||||
|
std::filesystem::create_directory(debugOutputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
FaceRecognizer* recognizer = nullptr;
|
||||||
|
std::mutex recognizerMutex;
|
||||||
|
if(!config.focusPersonImage.empty())
|
||||||
|
{
|
||||||
|
cv::Mat personImage = cv::imread(config.focusPersonImage);
|
||||||
|
if(personImage.empty())
|
||||||
|
{
|
||||||
|
Log(Log::ERROR)<<"Could not load image from "<<config.focusPersonImage;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
recognizer = new FaceRecognizer();
|
||||||
|
recognizer->addReferances({personImage});
|
||||||
|
recognizer->setThreshold(config.threshold);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
std::vector<std::vector<std::filesystem::path>> imagePathParts = splitVector(imagePaths, std::thread::hardware_concurrency());
|
||||||
|
|
||||||
|
for(size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
|
||||||
|
threads.push_back(std::thread(threadFn, imagePathParts[i], std::ref(config), recognizer, std::ref(recognizerMutex), std::ref(debugOutputPath)));
|
||||||
|
|
||||||
|
for(std::thread& thread : threads)
|
||||||
|
thread.join();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
98
SmartCrop/options.h
Normal file
98
SmartCrop/options.h
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <argp.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
const char *argp_program_version = "AIImagePreprocesses";
|
||||||
|
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
||||||
|
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
||||||
|
static char args_doc[] = "FILE(S)";
|
||||||
|
|
||||||
|
static struct argp_option options[] =
|
||||||
|
{
|
||||||
|
{"verbose", 'v', 0, 0, "Show debug messages" },
|
||||||
|
{"quiet", 'q', 0, 0, "only output data" },
|
||||||
|
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
||||||
|
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
||||||
|
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
||||||
|
{"debug", 'd', 0, 0, "output debug images" },
|
||||||
|
{"seam-carving", 's', 0, 0, "use seam carving to change image aspect ratio instead of croping"},
|
||||||
|
{"size", 'z', "[PIXELS]", 0, "target output size, default: 512"},
|
||||||
|
{"focus-person", 'f', "[FILENAME]", 0, "a file name to an image of a person that the crop should focus on"},
|
||||||
|
{"person-threshold", 't', "[NUMBER]", 0, "the threshold at witch to consider a person matched, defaults to 0.363"},
|
||||||
|
{0}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Config
|
||||||
|
{
|
||||||
|
std::vector<std::filesystem::path> imagePaths;
|
||||||
|
std::filesystem::path modelPath;
|
||||||
|
std::filesystem::path classesPath;
|
||||||
|
std::filesystem::path outputDir;
|
||||||
|
std::filesystem::path focusPersonImage;
|
||||||
|
bool seamCarving = false;
|
||||||
|
bool debug = false;
|
||||||
|
double threshold = 0.363;
|
||||||
|
cv::Size targetSize = cv::Size(512, 512);
|
||||||
|
};
|
||||||
|
|
||||||
|
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
||||||
|
{
|
||||||
|
Config *config = reinterpret_cast<Config*>(state->input);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
switch (key)
|
||||||
|
{
|
||||||
|
case 'q':
|
||||||
|
Log::level = Log::ERROR;
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
Log::level = Log::DEBUG;
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
config->modelPath = arg;
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
config->classesPath = arg;
|
||||||
|
break;
|
||||||
|
case 'd':
|
||||||
|
config->debug = true;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
config->outputDir.assign(arg);
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
config->seamCarving = true;
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
config->focusPersonImage = arg;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
config->threshold = std::atof(arg);
|
||||||
|
break;
|
||||||
|
case 'z':
|
||||||
|
{
|
||||||
|
int x = std::stoi(arg);
|
||||||
|
config->targetSize = cv::Size(x, x);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARGP_KEY_ARG:
|
||||||
|
config->imagePaths.push_back(arg);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return ARGP_ERR_UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(const std::invalid_argument& ex)
|
||||||
|
{
|
||||||
|
std::cout<<arg<<" passed for argument -"<<static_cast<char>(key)<<" is not a valid number.\n";
|
||||||
|
return ARGP_KEY_ERROR;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct argp argp = {options, parse_opt, args_doc, doc};
|
|
@ -1,19 +1,19 @@
|
||||||
#include "seamcarving.h"
|
#include "seamcarving.h"
|
||||||
|
|
||||||
#include <opencv2/imgcodecs.hpp>
|
#include <opencv2/imgcodecs.hpp>
|
||||||
#include <opencv2/highgui/highgui.hpp>
|
#include <opencv2/highgui/highgui.hpp>
|
||||||
#include <opencv2/imgproc.hpp>
|
#include <opencv2/imgproc.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#if __cplusplus >= 201703L
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#endif
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
#include <vector>
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
SeamCarving::SeamCarving(const cv::Mat &img, int seams, bool grow) :
|
bool SeamCarving::strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||||
image(img), seams(seams), grow(grow) {}
|
|
||||||
|
|
||||||
void SeamCarving::init()
|
|
||||||
{
|
{
|
||||||
cv::Mat newFrame = image.clone();
|
cv::Mat newFrame = image.clone();
|
||||||
|
assert(!newFrame.empty());
|
||||||
|
std::vector<std::vector<int>> vecSeams;
|
||||||
|
|
||||||
for(int i = 0; i < seams; i++)
|
for(int i = 0; i < seams; i++)
|
||||||
{
|
{
|
||||||
|
@ -24,230 +24,55 @@ void SeamCarving::init()
|
||||||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
||||||
|
|
||||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
||||||
{
|
return false;
|
||||||
finalImage = image;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
||||||
vecSeams.push_back(seam);
|
vecSeams.push_back(seam);
|
||||||
|
if(seamsVect)
|
||||||
|
seamsVect->push_back(seam);
|
||||||
|
|
||||||
newFrame = removeLeastImportantPath(newFrame,seam);
|
newFrame = removeLeastImportantPath(newFrame, seam);
|
||||||
|
|
||||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
if(newFrame.rows == 0 || newFrame.cols == 0)
|
||||||
{
|
return false;
|
||||||
finalImage = image;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (grow)
|
if (grow)
|
||||||
{
|
{
|
||||||
cv::Mat growMat = image.clone();
|
cv::Mat growMat = image.clone();
|
||||||
|
|
||||||
for (int i = 0; i < vecSeams.size(); i++)
|
for(size_t i = 0; i < vecSeams.size(); i++)
|
||||||
{
|
{
|
||||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
||||||
}
|
}
|
||||||
finalImage = growMat;
|
image = growMat;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
finalImage = newFrame;
|
image = newFrame;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
sliderPos = seams;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SeamCarving::computeNewFinalImage(int sliderPos)
|
bool SeamCarving::strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect)
|
||||||
{
|
{
|
||||||
if(sliderPos == 0)
|
cv::transpose(image, image);
|
||||||
{
|
bool ret = strechImage(image, seams, grow, seamsVect);
|
||||||
finalImage = image;
|
cv::transpose(image, image);
|
||||||
return;
|
return ret;
|
||||||
}
|
|
||||||
if(sliderPos < 1 || sliderPos >= sliderMax-1)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if(sliderPos > vecSeams.size())
|
|
||||||
{
|
|
||||||
cv::Mat newFrame = finalImage.clone();
|
|
||||||
for(int i = vecSeams.size()-1; i < sliderPos; i++)
|
|
||||||
{
|
|
||||||
//Gradient Magnitude for intensity of image.
|
|
||||||
cv::Mat gradientMagnitude = computeGradientMagnitude(newFrame);
|
|
||||||
//Use DP to create the real energy map that is used for path calculation.
|
|
||||||
// Strictly using vertical paths for testing simplicity.
|
|
||||||
cv::Mat pathIntensityMat = computePathIntensityMat(gradientMagnitude);
|
|
||||||
|
|
||||||
if(pathIntensityMat.rows == 0 && pathIntensityMat.cols == 0)
|
|
||||||
{
|
|
||||||
finalImage = image;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
std::vector<int> seam = getLeastImportantPath(pathIntensityMat);
|
|
||||||
vecSeams.push_back(seam);
|
|
||||||
newFrame = removeLeastImportantPath(newFrame,seam);
|
|
||||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
|
||||||
{
|
|
||||||
finalImage = image;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (grow)
|
|
||||||
{
|
|
||||||
cv::Mat growMat = image.clone();
|
|
||||||
|
|
||||||
for (int i = 0; i < vecSeams.size(); i++)
|
|
||||||
{
|
|
||||||
growMat = addLeastImportantPath(growMat,vecSeams[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
finalImage = growMat;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
finalImage = newFrame;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (sliderPos < vecSeams.size())
|
|
||||||
{
|
|
||||||
cv::Mat newFrame = image.clone();
|
|
||||||
for(int i = 0; i < sliderPos; i++) // TODO check if it is faster to add seams back (probably not)
|
|
||||||
{
|
|
||||||
|
|
||||||
if (grow)
|
|
||||||
{
|
|
||||||
newFrame = addLeastImportantPath(newFrame,vecSeams[i]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
newFrame = removeLeastImportantPath(newFrame,vecSeams[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(newFrame.rows == 0 && newFrame.cols == 0)
|
|
||||||
{
|
|
||||||
finalImage = image;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finalImage = newFrame;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const cv::Mat& SeamCarving::getFinalImage()
|
bool SeamCarving::strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow)
|
||||||
{
|
{
|
||||||
return finalImage;
|
std::vector<std::vector<int>> seamsVect;
|
||||||
}
|
seamsImage = image.clone();
|
||||||
|
|
||||||
void SeamCarving::showSeamsImg()
|
bool ret = SeamCarving::strechImage(image, seams, grow, &seamsVect);
|
||||||
{
|
if(!ret)
|
||||||
cv::Mat seamsFrame = image.clone();
|
return false;
|
||||||
//std::cout << "sliderPos: " << sliderPos << std::endl;
|
|
||||||
for(int i = 0; i < sliderPos; i++)
|
|
||||||
{
|
|
||||||
seamsFrame = drawSeam(seamsFrame, vecSeams[i]);
|
|
||||||
}
|
|
||||||
cv::imwrite("output/seams_image.jpg", seamsFrame);
|
|
||||||
cv::imshow( "Image Seams", seamsFrame);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void onChange( int pos, void* object )
|
for(size_t i = 0; i < seamsVect.size(); ++i)
|
||||||
{
|
seamsImage = drawSeam(seamsImage, seamsVect[i]);
|
||||||
SeamCarving* sc = (SeamCarving*)(object);
|
return true;
|
||||||
/*if(sc->getBlockUpdateStatus()) {
|
|
||||||
return;
|
|
||||||
}*/
|
|
||||||
sc->computeNewFinalImage(pos);
|
|
||||||
imshow("Final Image", sc->getFinalImage());
|
|
||||||
#if DEBUG
|
|
||||||
sc->showSeamsImg();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
static void onMouse( int event, int x, int y, int, void* object)
|
|
||||||
{
|
|
||||||
SeamCarving* sc = (SeamCarving*)(object);
|
|
||||||
if( event == cv::EVENT_LBUTTONDOWN ||
|
|
||||||
event == cv::EVENT_RBUTTONDOWN ||
|
|
||||||
event == cv::EVENT_MBUTTONDOWN
|
|
||||||
)
|
|
||||||
{
|
|
||||||
sc->setBlockUpdate(true);
|
|
||||||
}
|
|
||||||
else if(event == cv::EVENT_LBUTTONUP ||
|
|
||||||
event == cv::EVENT_RBUTTONUP ||
|
|
||||||
event == cv::EVENT_MBUTTONUP)
|
|
||||||
{
|
|
||||||
sc->setBlockUpdate(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SeamCarving::setBlockUpdate(bool bUpdate)
|
|
||||||
{
|
|
||||||
blockUpdate = bUpdate;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SeamCarving::getBlockUpdateStatus()
|
|
||||||
{
|
|
||||||
return blockUpdate;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SeamCarving::showImage()
|
|
||||||
{
|
|
||||||
#if __cplusplus >= 201703L
|
|
||||||
if(!std::filesystem::exists("output"))
|
|
||||||
{
|
|
||||||
std::filesystem::create_directory("output");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if( image.empty() )
|
|
||||||
{
|
|
||||||
std::cout << "Could not open raw image" << std::endl ;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
namedWindow( "Raw Image", cv::WINDOW_AUTOSIZE );
|
|
||||||
cv::imshow( "Raw Image", image );
|
|
||||||
|
|
||||||
if( finalImage.empty() )
|
|
||||||
{
|
|
||||||
std::cout << "Could not open final image" << std::endl ;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#if DEBUG
|
|
||||||
namedWindow( "gradient Image", cv::WINDOW_AUTOSIZE );
|
|
||||||
cv::Mat gradient = computeGradientMagnitude(image);
|
|
||||||
cv::Mat u8_image;
|
|
||||||
gradient.convertTo(u8_image, CV_8U);
|
|
||||||
|
|
||||||
cv::imwrite("output/gradient_image.jpg", u8_image);
|
|
||||||
cv::imshow("gradient Image", u8_image);
|
|
||||||
|
|
||||||
namedWindow( "intensity Image", cv::WINDOW_AUTOSIZE );
|
|
||||||
cv::Mat u8_image2;
|
|
||||||
cv::Mat intensityMat = computePathIntensityMat(gradient);
|
|
||||||
cv::Mat dst;
|
|
||||||
cv::normalize(intensityMat, dst, 0, 255, cv::NORM_MINMAX);
|
|
||||||
dst.convertTo(u8_image2, CV_8U);
|
|
||||||
cv::imwrite("output/intensity_image.jpg", u8_image2);
|
|
||||||
cv::imshow( "intensity Image", u8_image2);
|
|
||||||
|
|
||||||
//cv::Mat engImg = GetEnergyImg(image);
|
|
||||||
//namedWindow("energy Image", cv::WINDOW_AUTOSIZE);
|
|
||||||
//cv::Mat u8_image3;
|
|
||||||
//engImg.convertTo(u8_image3, CV_8U);
|
|
||||||
//cv::imshow( "energy Image", u8_image3);
|
|
||||||
namedWindow("Image Seams", cv::WINDOW_AUTOSIZE);
|
|
||||||
showSeamsImg();
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namedWindow( "Final Image", cv::WINDOW_AUTOSIZE );
|
|
||||||
cv::createTrackbar("Seams", "Final Image", &sliderPos, sliderMax, onChange, this);
|
|
||||||
//cv::setMouseCallback("Final Image", onMouse, this );
|
|
||||||
cv::imwrite("output/final_image.jpg", finalImage);
|
|
||||||
cv::imshow("Final Image", finalImage);
|
|
||||||
cv::waitKey(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
|
cv::Mat SeamCarving::GetEnergyImg(const cv::Mat &img)
|
||||||
|
@ -392,9 +217,7 @@ cv::Mat SeamCarving::removeLeastImportantPath(const cv::Mat &original, const std
|
||||||
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
|
cv::Size size = cv::Size(orgSize.width-1, orgSize.height);
|
||||||
cv::Mat newMat = cv::Mat(size, original.type());
|
cv::Mat newMat = cv::Mat(size, original.type());
|
||||||
|
|
||||||
unsigned char *rawOrig = original.data;
|
for(size_t row = 0; row < seam.size(); row++)
|
||||||
unsigned char *rawOutput = newMat.data;
|
|
||||||
for(int row = 0; row < seam.size(); row++)
|
|
||||||
{
|
{
|
||||||
removePixel(original, newMat, row, seam[row]);
|
removePixel(original, newMat, row, seam[row]);
|
||||||
}
|
}
|
||||||
|
@ -460,9 +283,7 @@ cv::Mat SeamCarving::addLeastImportantPath(const cv::Mat &original, const std::v
|
||||||
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
|
cv::Size size = cv::Size(orgSize.width+1, orgSize.height);
|
||||||
cv::Mat newMat = cv::Mat(size, original.type());
|
cv::Mat newMat = cv::Mat(size, original.type());
|
||||||
|
|
||||||
unsigned char *rawOrig = original.data;
|
for(size_t row = 0; row < seam.size(); row++)
|
||||||
unsigned char *rawOutput = newMat.data;
|
|
||||||
for(int row = 0; row < seam.size(); row++)
|
|
||||||
{
|
{
|
||||||
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
|
//std::cout << "row: " << row << ", col: " << seam[row] << std::endl;
|
||||||
addPixel(original, newMat, row, seam[row]);
|
addPixel(original, newMat, row, seam[row]);
|
||||||
|
@ -518,3 +339,18 @@ void SeamCarving::addPixel(const cv::Mat &original, cv::Mat &outputMat, int row,
|
||||||
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
rawOutput[newRowStart + leftPixel*channels+2] = (unsigned char) ((byte3 + byte3L)/2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cv::Mat SeamCarving::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
||||||
|
{
|
||||||
|
cv::Mat retMat = frame.clone();
|
||||||
|
for(int row = 0; row < frame.rows; row++)
|
||||||
|
{
|
||||||
|
for(int col = 0; col < frame.cols; col++)
|
||||||
|
{
|
||||||
|
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
||||||
|
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
||||||
|
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retMat;
|
||||||
|
}
|
24
SmartCrop/seamcarving.h
Normal file
24
SmartCrop/seamcarving.h
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <opencv2/core/core.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class SeamCarving
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
static cv::Mat GetEnergyImg(const cv::Mat &img);
|
||||||
|
static cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
||||||
|
static float intensity(float currIndex, int start, int end);
|
||||||
|
static cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
||||||
|
static std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
||||||
|
static cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||||
|
static void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
||||||
|
static cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
||||||
|
static void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
||||||
|
static cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static bool strechImage(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||||
|
static bool strechImageVert(cv::Mat& image, int seams, bool grow, std::vector<std::vector<int>>* seamsVect = nullptr);
|
||||||
|
static bool strechImageWithSeamsImage(cv::Mat& image, cv::Mat& seamsImage, int seams, bool grow);
|
||||||
|
};
|
|
@ -11,8 +11,8 @@
|
||||||
#define INCBIN_PREFIX r
|
#define INCBIN_PREFIX r
|
||||||
#include "incbin.h"
|
#include "incbin.h"
|
||||||
|
|
||||||
INCTXT(defaultClasses, "../classes.txt");
|
INCTXT(defaultClasses, WEIGHT_DIR "/classes.txt");
|
||||||
INCBIN(defaultModel, "../yolov8x.onnx");
|
INCBIN(defaultModel, WEIGHT_DIR "/yolov8x.onnx");
|
||||||
|
|
||||||
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInputShape,
|
||||||
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
const std::filesystem::path& classesTxtFilePath, bool runWithOCl)
|
||||||
|
@ -22,6 +22,7 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
|
||||||
|
|
||||||
if(classesTxtFilePath.empty())
|
if(classesTxtFilePath.empty())
|
||||||
{
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin classes";
|
||||||
loadClasses(rdefaultClassesData);
|
loadClasses(rdefaultClassesData);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -31,19 +32,21 @@ Yolo::Yolo(const std::filesystem::path &onnxModelPath, const cv::Size &modelInpu
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!modelPath.empty())
|
if(!modelPath.empty())
|
||||||
|
{
|
||||||
net = cv::dnn::readNetFromONNX(modelPath);
|
net = cv::dnn::readNetFromONNX(modelPath);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
Log(Log::INFO)<<"Using builtin yolo model";
|
||||||
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
|
net = cv::dnn::readNetFromONNX((const char*)rdefaultModelData, rdefaultModelSize);
|
||||||
|
}
|
||||||
if(runWithOCl)
|
if(runWithOCl)
|
||||||
{
|
{
|
||||||
std::cout << "\nRunning on OCV" << std::endl;
|
|
||||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
|
||||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "\nRunning on CPU" << std::endl;
|
|
||||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||||
}
|
}
|
||||||
|
@ -176,14 +179,33 @@ std::vector<Yolo::Detection> Yolo::runInference(const cv::Mat &input)
|
||||||
|
|
||||||
result.className = classes[result.class_id].first;
|
result.className = classes[result.class_id].first;
|
||||||
result.priority = classes[result.class_id].second;
|
result.priority = classes[result.class_id].second;
|
||||||
|
clampBox(boxes[idx], input.size());
|
||||||
result.box = boxes[idx];
|
result.box = boxes[idx];
|
||||||
|
|
||||||
detections.push_back(result);
|
detections.push_back(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
return detections;
|
return detections;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Yolo::clampBox(cv::Rect& box, const cv::Size& size)
|
||||||
|
{
|
||||||
|
if(box.x < 0)
|
||||||
|
{
|
||||||
|
box.width += box.x;
|
||||||
|
box.x = 0;
|
||||||
|
}
|
||||||
|
if(box.y < 0)
|
||||||
|
{
|
||||||
|
box.height += box.y;
|
||||||
|
box.y = 0;
|
||||||
|
}
|
||||||
|
if(box.x+box.width > size.width)
|
||||||
|
box.width = size.width - box.x;
|
||||||
|
if(box.y+box.height > size.height)
|
||||||
|
box.height = size.height - box.y;
|
||||||
|
}
|
||||||
|
|
||||||
void Yolo::loadClasses(const std::string& classesStr)
|
void Yolo::loadClasses(const std::string& classesStr)
|
||||||
{
|
{
|
||||||
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
|
std::vector<std::string> candidateClasses = tokenizeBinaryIgnore(classesStr, '\n', '"', '\\');
|
|
@ -27,19 +27,16 @@ private:
|
||||||
static constexpr float modelScoreThreshold = 0.45;
|
static constexpr float modelScoreThreshold = 0.45;
|
||||||
static constexpr float modelNMSThreshold = 0.50;
|
static constexpr float modelNMSThreshold = 0.50;
|
||||||
|
|
||||||
|
std::string modelPath;
|
||||||
|
std::vector<std::pair<std::string, int>> classes;
|
||||||
|
cv::Size2f modelShape;
|
||||||
|
bool letterBoxForSquare = true;
|
||||||
|
cv::dnn::Net net;
|
||||||
|
|
||||||
void loadClasses(const std::string& classes);
|
void loadClasses(const std::string& classes);
|
||||||
void loadOnnxNetwork(const std::filesystem::path& path);
|
void loadOnnxNetwork(const std::filesystem::path& path);
|
||||||
cv::Mat formatToSquare(const cv::Mat &source);
|
cv::Mat formatToSquare(const cv::Mat &source);
|
||||||
|
static void clampBox(cv::Rect& box, const cv::Size& size);
|
||||||
std::string modelPath;
|
|
||||||
|
|
||||||
std::vector<std::pair<std::string, int>> classes;
|
|
||||||
|
|
||||||
cv::Size2f modelShape;
|
|
||||||
|
|
||||||
bool letterBoxForSquare = true;
|
|
||||||
|
|
||||||
cv::dnn::Net net;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
|
Yolo(const std::filesystem::path &onnxModelPath = "", const cv::Size& modelInputShape = {640, 480},
|
80
Weights/classes.txt
Normal file
80
Weights/classes.txt
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
person, 10
|
||||||
|
bicycle, 4
|
||||||
|
car, 3
|
||||||
|
motorcycle, 4
|
||||||
|
airplane, 4
|
||||||
|
bus, 4
|
||||||
|
train, 4
|
||||||
|
truck, 3
|
||||||
|
boat, 4
|
||||||
|
traffic light, 1
|
||||||
|
fire hydrant, 1
|
||||||
|
stop sign, 1
|
||||||
|
parking meter, 1
|
||||||
|
bench, 2
|
||||||
|
bird, 5
|
||||||
|
cat, 6
|
||||||
|
dog, 5
|
||||||
|
horse, 4
|
||||||
|
sheep, 5
|
||||||
|
cow, 4
|
||||||
|
elephant, 5
|
||||||
|
bear, 5
|
||||||
|
zebra, 5
|
||||||
|
giraffe, 5
|
||||||
|
backpack, 3
|
||||||
|
umbrella, 3
|
||||||
|
handbag, 3
|
||||||
|
tie, 3
|
||||||
|
suitcase, 2
|
||||||
|
frisbee, 3
|
||||||
|
skis, 3
|
||||||
|
snowboard, 3
|
||||||
|
sports ball, 3
|
||||||
|
kite, 4
|
||||||
|
baseball bat, 3
|
||||||
|
baseball glove, 3
|
||||||
|
skateboard, 3
|
||||||
|
surfboard, 3
|
||||||
|
tennis racket, 3
|
||||||
|
bottle, 2
|
||||||
|
wine glass, 2
|
||||||
|
cup, 2
|
||||||
|
fork, 1
|
||||||
|
knife, 1
|
||||||
|
spoon, 1
|
||||||
|
bowl, 1
|
||||||
|
banana, 1
|
||||||
|
apple, 1
|
||||||
|
sandwich,1
|
||||||
|
orange, 1
|
||||||
|
broccoli, 1
|
||||||
|
carrot, 1
|
||||||
|
hot dog, 1
|
||||||
|
pizza, 1
|
||||||
|
donut, 2
|
||||||
|
cake, 2
|
||||||
|
chair, 1
|
||||||
|
couch, 1
|
||||||
|
potted plant, 1
|
||||||
|
bed, 1
|
||||||
|
dining table, 1
|
||||||
|
toilet, 1
|
||||||
|
tv, 1
|
||||||
|
laptop, 1
|
||||||
|
mouse, 1
|
||||||
|
remote, 1
|
||||||
|
keyboard, 1
|
||||||
|
cell phone, 1
|
||||||
|
microwave, 1
|
||||||
|
oven, 1
|
||||||
|
toaster, 1
|
||||||
|
sink, 1
|
||||||
|
refrigerator, 1
|
||||||
|
book, 1
|
||||||
|
clock, 1
|
||||||
|
vase, 1
|
||||||
|
scissors, 1
|
||||||
|
teddy bear, 1
|
||||||
|
hair drier, 1
|
||||||
|
toothbrush, 1
|
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
BIN
Weights/face_detection_yunet_2023mar.onnx
Normal file
Binary file not shown.
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
BIN
Weights/face_recognition_sface_2021dec.onnx
Normal file
Binary file not shown.
BIN
Weights/yolov8x.onnx
Normal file
BIN
Weights/yolov8x.onnx
Normal file
Binary file not shown.
295
main.cpp
295
main.cpp
|
@ -1,295 +0,0 @@
|
||||||
#include <filesystem>
|
|
||||||
#include <iostream>
|
|
||||||
#include <opencv2/core/types.hpp>
|
|
||||||
#include <opencv2/imgproc.hpp>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "yolo.h"
|
|
||||||
#include "log.h"
|
|
||||||
#include "options.h"
|
|
||||||
#include "utils.h"
|
|
||||||
#include "intelligentroi.h"
|
|
||||||
|
|
||||||
const Yolo::Detection* pointInDetectionHoriz(int x, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* inDetection = nullptr;
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
{
|
|
||||||
if(!ignore || ignore != &detection)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if(detection.box.x <= x && detection.box.x+detection.box.width <= x)
|
|
||||||
{
|
|
||||||
if(!inDetection || detection.box.br().x > inDetection->box.br().x)
|
|
||||||
inDetection = &detection;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return inDetection;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool findRegionEndpointHoriz(int& x, const std::vector<Yolo::Detection>& detections, int imgSizeX)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* inDetection = pointInDetectionHoriz(x, detections);
|
|
||||||
|
|
||||||
if(!inDetection)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* closest = nullptr;
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
{
|
|
||||||
if(detection.box.x > x)
|
|
||||||
{
|
|
||||||
if(closest == nullptr || detection.box.x-x > closest->box.x-x)
|
|
||||||
closest = &detection;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(closest)
|
|
||||||
x = closest->box.x;
|
|
||||||
else
|
|
||||||
x = imgSizeX;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
x = inDetection->box.br().x;
|
|
||||||
const Yolo::Detection* candidateDetection = pointInDetectionHoriz(x, detections, inDetection);
|
|
||||||
if(candidateDetection && candidateDetection->box.br().x > x)
|
|
||||||
return findRegionEndpointHoriz(x, detections, imgSizeX);
|
|
||||||
else
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoHorzRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
|
||||||
{
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> out;
|
|
||||||
|
|
||||||
for(int x = 0; x < image.cols; ++x)
|
|
||||||
{
|
|
||||||
int start = x;
|
|
||||||
bool frozen = findRegionEndpointHoriz(x, detections, image.cols);
|
|
||||||
|
|
||||||
cv::Mat slice = image(cv::Rect(start, 0, x-start, image.rows));
|
|
||||||
out.push_back({slice, frozen});
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Yolo::Detection* pointInDetectionVert(int y, const std::vector<Yolo::Detection>& detections, const Yolo::Detection* ignore = nullptr)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* inDetection = nullptr;
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
{
|
|
||||||
if(!ignore || ignore != &detection)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if(detection.box.y <= y && detection.box.y+detection.box.height <= y)
|
|
||||||
{
|
|
||||||
if(!inDetection || detection.box.br().y > inDetection->box.br().y)
|
|
||||||
inDetection = &detection;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return inDetection;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool findRegionEndpointVert(int& y, const std::vector<Yolo::Detection>& detections, int imgSizeY)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* inDetection = pointInDetectionVert(y, detections);
|
|
||||||
|
|
||||||
if(!inDetection)
|
|
||||||
{
|
|
||||||
const Yolo::Detection* closest = nullptr;
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
{
|
|
||||||
if(detection.box.y > y)
|
|
||||||
{
|
|
||||||
if(closest == nullptr || detection.box.y-y > closest->box.y-y)
|
|
||||||
closest = &detection;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(closest)
|
|
||||||
y = closest->box.y;
|
|
||||||
else
|
|
||||||
y = imgSizeY;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
y = inDetection->box.br().y;
|
|
||||||
const Yolo::Detection* candidateDetection = pointInDetectionVert(y, detections, inDetection);
|
|
||||||
if(candidateDetection && candidateDetection->box.br().y > y)
|
|
||||||
return findRegionEndpointVert(y, detections, imgSizeY);
|
|
||||||
else
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> cutImageIntoVertRegions(cv::Mat& image, const std::vector<Yolo::Detection>& detections)
|
|
||||||
{
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> out;
|
|
||||||
|
|
||||||
for(int y = 0; y < image.rows; ++y)
|
|
||||||
{
|
|
||||||
int start = y;
|
|
||||||
bool frozen = findRegionEndpointVert(y, detections, image.rows);
|
|
||||||
|
|
||||||
cv::Mat slice = image(cv::Rect(0, start, image.cols, y-start));
|
|
||||||
out.push_back({slice, frozen});
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool seamCarveResize(cv::Mat& image, const std::vector<Yolo::Detection>& detections, double targetAspectRatio = 1.0)
|
|
||||||
{
|
|
||||||
double aspectRatio = image.cols/static_cast<double>(image.rows);
|
|
||||||
|
|
||||||
bool vertical = false;
|
|
||||||
cv::Mat workImage;
|
|
||||||
if(aspectRatio > targetAspectRatio)
|
|
||||||
vertical = true;
|
|
||||||
|
|
||||||
int requiredLines = 0;
|
|
||||||
if(!vertical)
|
|
||||||
requiredLines = workImage.rows*targetAspectRatio - workImage.cols;
|
|
||||||
else
|
|
||||||
requiredLines = workImage.cols/targetAspectRatio - workImage.rows;
|
|
||||||
|
|
||||||
Log(Log::DEBUG)<<__func__<<' '<<requiredLines<<" lines are required in "<<(vertical ? "vertical" : "horizontal")<<" direction";
|
|
||||||
|
|
||||||
if(!vertical)
|
|
||||||
{
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoHorzRegions(image, detections);
|
|
||||||
int totalResizableSize = 0;
|
|
||||||
for(const std::pair<cv::Mat, bool>& slice : slices)
|
|
||||||
{
|
|
||||||
if(slice.second)
|
|
||||||
totalResizableSize += slice.first.cols;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> seamsForSlice(slices.size());
|
|
||||||
for(size_t i = 0; i < slices.size(); ++i)
|
|
||||||
{
|
|
||||||
seamsForSlice[i] = (static_cast<double>(slices[i].first.cols)/totalResizableSize)*requiredLines;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int totalResizableSize = 0;
|
|
||||||
std::vector<std::pair<cv::Mat, bool>> slices = cutImageIntoVertRegions(image, detections);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void drawDebugInfo(cv::Mat &image, const cv::Rect& rect, const std::vector<Yolo::Detection>& detections)
|
|
||||||
{
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
{
|
|
||||||
cv::rectangle(image, detection.box, detection.color, 4);
|
|
||||||
std::string label = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
|
||||||
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 3, 2, 0);
|
|
||||||
cv::Rect textBox(detection.box.x, detection.box.y - 80, labelSize.width + 10, labelSize.height + 20);
|
|
||||||
cv::rectangle(image, textBox, detection.color, cv::FILLED);
|
|
||||||
cv::putText(image, label, cv::Point(detection.box.x + 5, detection.box.y - 10), cv::FONT_HERSHEY_DUPLEX, 3, cv::Scalar(0, 0, 0), 2, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::rectangle(image, rect, cv::Scalar(0, 0, 255), 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
|
||||||
{
|
|
||||||
Log::level = Log::INFO;
|
|
||||||
|
|
||||||
Config config;
|
|
||||||
argp_parse(&argp, argc, argv, 0, 0, &config);
|
|
||||||
|
|
||||||
if(config.outputDir.empty())
|
|
||||||
{
|
|
||||||
Log(Log::ERROR)<<"a output path \"-o\" is required";
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(config.imagePaths.empty())
|
|
||||||
{
|
|
||||||
Log(Log::ERROR)<<"at least one input image or directory is required";
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::filesystem::path> imagePaths;
|
|
||||||
|
|
||||||
for(const std::filesystem::path& path : config.imagePaths)
|
|
||||||
getImageFiles(path, imagePaths);
|
|
||||||
|
|
||||||
if(imagePaths.empty())
|
|
||||||
{
|
|
||||||
Log(Log::ERROR)<<"no image was found\n";
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Yolo yolo(config.modelPath, {640, 480}, config.classesPath, false);
|
|
||||||
InteligentRoi intRoi(yolo);
|
|
||||||
|
|
||||||
if(!std::filesystem::exists(config.outputDir))
|
|
||||||
{
|
|
||||||
if(!std::filesystem::create_directory(config.outputDir))
|
|
||||||
{
|
|
||||||
Log(Log::ERROR)<<"could not create directory at "<<config.outputDir;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::filesystem::path debugOutputPath(config.outputDir/"debug");
|
|
||||||
if(config.debug)
|
|
||||||
{
|
|
||||||
if(!std::filesystem::exists(debugOutputPath))
|
|
||||||
std::filesystem::create_directory(debugOutputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(const std::filesystem::path& path : imagePaths)
|
|
||||||
{
|
|
||||||
cv::Mat image = cv::imread(path);
|
|
||||||
if(!image.data)
|
|
||||||
{
|
|
||||||
Log(Log::WARN)<<"could not load image "<<path<<" skipping";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(std::max(image.cols, image.rows) > 1024)
|
|
||||||
{
|
|
||||||
if(image.cols > image.rows)
|
|
||||||
{
|
|
||||||
double ratio = 1024.0/image.cols;
|
|
||||||
cv::resize(image, image, {1024, static_cast<int>(image.rows*ratio)}, 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
double ratio = 1024.0/image.rows;
|
|
||||||
cv::resize(image, image, {static_cast<int>(image.cols*ratio), 1024}, 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<Yolo::Detection> detections = yolo.runInference(image);
|
|
||||||
|
|
||||||
Log(Log::DEBUG)<<"Got "<<detections.size()<<" detections for "<<path;
|
|
||||||
for(const Yolo::Detection& detection : detections)
|
|
||||||
Log(Log::DEBUG)<<detection.class_id<<": "<<detection.className<<" at "<<detection.box<<" with prio "<<detection.priority;
|
|
||||||
|
|
||||||
|
|
||||||
cv::Rect crop = intRoi.getCropRectangle(detections, image.size());
|
|
||||||
|
|
||||||
cv::Mat debugImage = image.clone();
|
|
||||||
drawDebugInfo(debugImage, crop, detections);
|
|
||||||
bool ret = cv::imwrite(debugOutputPath/path.filename(), debugImage);
|
|
||||||
if(!ret)
|
|
||||||
Log(Log::WARN)<<"could not save debug image to "<<debugOutputPath/path.filename()<<" skipping";
|
|
||||||
|
|
||||||
cv::Mat croppedImage = image(crop);
|
|
||||||
cv::Mat resizedImage;
|
|
||||||
cv::resize(croppedImage, resizedImage, {512, 512}, 0, 0, cv::INTER_CUBIC);
|
|
||||||
ret = cv::imwrite(config.outputDir/path.filename(), resizedImage);
|
|
||||||
if(!ret)
|
|
||||||
Log(Log::WARN)<<"could not save image to "<<config.outputDir/path.filename()<<" skipping";
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
70
options.h
70
options.h
|
@ -1,70 +0,0 @@
|
||||||
#pragma once
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <argp.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <filesystem>
|
|
||||||
#include "log.h"
|
|
||||||
|
|
||||||
const char *argp_program_version = "AIImagePreprocesses";
|
|
||||||
const char *argp_program_bug_address = "<carl@uvos.xyz>";
|
|
||||||
static char doc[] = "Application that trainsforms images into formats, sizes and aspect ratios required for ai training";
|
|
||||||
static char args_doc[] = "[IMAGES]";
|
|
||||||
|
|
||||||
static struct argp_option options[] =
|
|
||||||
{
|
|
||||||
{"verbose", 'v', 0, 0, "Show debug messages" },
|
|
||||||
{"quiet", 'q', 0, 0, "only output data" },
|
|
||||||
{"model", 'm', "[FILENAME]", 0, "YoloV8 model to use for detection" },
|
|
||||||
{"classes", 'c', "[FILENAME]", 0, "classes text file to use" },
|
|
||||||
{"out", 'o', "[DIRECTORY]", 0, "directory whre images are to be saved" },
|
|
||||||
{"debug", 'd', 0, 0, "output debug images" },
|
|
||||||
{"seam-carving", 's', 0, 0, "model to train: "}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Config
|
|
||||||
{
|
|
||||||
std::vector<std::filesystem::path> imagePaths;
|
|
||||||
std::filesystem::path modelPath;
|
|
||||||
std::filesystem::path classesPath;
|
|
||||||
std::filesystem::path outputDir;
|
|
||||||
bool seamCarving = false;
|
|
||||||
bool debug = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
|
||||||
{
|
|
||||||
Config *config = reinterpret_cast<Config*>(state->input);
|
|
||||||
switch (key)
|
|
||||||
{
|
|
||||||
case 'q':
|
|
||||||
Log::level = Log::ERROR;
|
|
||||||
break;
|
|
||||||
case 'v':
|
|
||||||
Log::level = Log::DEBUG;
|
|
||||||
break;
|
|
||||||
case 'm':
|
|
||||||
config->modelPath = arg;
|
|
||||||
break;
|
|
||||||
case 'c':
|
|
||||||
config->classesPath = arg;
|
|
||||||
break;
|
|
||||||
case 'd':
|
|
||||||
config->debug = true;
|
|
||||||
break;
|
|
||||||
case 'o':
|
|
||||||
config->outputDir.assign(arg);
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
config->seamCarving = true;
|
|
||||||
break;
|
|
||||||
case ARGP_KEY_ARG:
|
|
||||||
config->imagePaths.push_back(arg);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return ARGP_ERR_UNKNOWN;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct argp argp = {options, parse_opt, args_doc, doc};
|
|
|
@ -1,61 +0,0 @@
|
||||||
#ifndef __SEAM__CARVING_HPP__
|
|
||||||
#define __SEAM__CARVING_HPP__
|
|
||||||
|
|
||||||
#include <opencv2/core/core.hpp>
|
|
||||||
#define DEBUG 0
|
|
||||||
|
|
||||||
class SeamCarving {
|
|
||||||
public:
|
|
||||||
void showImage();
|
|
||||||
const cv::Mat& getFinalImage();
|
|
||||||
virtual void computeNewFinalImage(int pos);
|
|
||||||
void setBlockUpdate(bool bUpdate);
|
|
||||||
bool getBlockUpdateStatus();
|
|
||||||
virtual void showSeamsImg();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
SeamCarving(const cv::Mat &img, int seams, bool grow);
|
|
||||||
void init();
|
|
||||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) = 0;
|
|
||||||
cv::Mat image;
|
|
||||||
cv::Mat finalImage;
|
|
||||||
int seams;
|
|
||||||
bool grow;
|
|
||||||
int sliderMax;
|
|
||||||
int sliderPos;
|
|
||||||
std::vector<std::vector<int>> vecSeams;
|
|
||||||
|
|
||||||
private:
|
|
||||||
cv::Mat GetEnergyImg(const cv::Mat &img);
|
|
||||||
cv::Mat computeGradientMagnitude(const cv::Mat &frame);
|
|
||||||
float intensity(float currIndex, int start, int end);
|
|
||||||
cv::Mat computePathIntensityMat(const cv::Mat &rawEnergyMap);
|
|
||||||
std::vector<int> getLeastImportantPath(const cv::Mat &importanceMap);
|
|
||||||
cv::Mat removeLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
|
||||||
void removePixel(const cv::Mat &original, cv::Mat &outputMap, int row, int minCol);
|
|
||||||
cv::Mat addLeastImportantPath(const cv::Mat &original, const std::vector<int> &seam);
|
|
||||||
void addPixel(const cv::Mat &original, cv::Mat &outputMat, int row, int minCol);
|
|
||||||
bool blockUpdate = false;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
class SeamCarvingHorizontal : public SeamCarving
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SeamCarvingHorizontal(char* fileName, int seams=100, bool grow=false);
|
|
||||||
protected:
|
|
||||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
|
|
||||||
};
|
|
||||||
|
|
||||||
class SeamCarvingVertical : public SeamCarving {
|
|
||||||
public:
|
|
||||||
SeamCarvingVertical(char* fileName, int seams=100, bool grow=false);
|
|
||||||
virtual void computeNewFinalImage(int pos) override;
|
|
||||||
#if DEBUG
|
|
||||||
virtual void showSeamsImg() override;
|
|
||||||
#endif
|
|
||||||
protected:
|
|
||||||
virtual cv::Mat drawSeam(const cv::Mat &frame, const std::vector<int> &seam) override;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // __SEAM__CARVING_HPP__
|
|
|
@ -1,28 +0,0 @@
|
||||||
#include "seamcarving.h"
|
|
||||||
#include <opencv2/imgcodecs.hpp>
|
|
||||||
#include <opencv2/highgui/highgui.hpp>
|
|
||||||
#include <opencv2/imgproc.hpp>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cfloat>
|
|
||||||
|
|
||||||
cv::Mat SeamCarvingHorizontal::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
|
||||||
{
|
|
||||||
cv::Mat retMat = frame.clone();
|
|
||||||
for(int row = 0; row < frame.rows; row++)
|
|
||||||
{
|
|
||||||
for(int col = 0; col < frame.cols; col++)
|
|
||||||
{
|
|
||||||
retMat.at<cv::Vec3b>(row, seam[row])[0] = 0;
|
|
||||||
retMat.at<cv::Vec3b>(row, seam[row])[1] = 255;
|
|
||||||
retMat.at<cv::Vec3b>(row, seam[row])[2] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return retMat;
|
|
||||||
}
|
|
||||||
|
|
||||||
SeamCarvingHorizontal::SeamCarvingHorizontal(char* fileName, int seams, bool grow) :
|
|
||||||
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
|
|
||||||
{
|
|
||||||
sliderMax = image.cols;
|
|
||||||
init();
|
|
||||||
}
|
|
|
@ -1,51 +0,0 @@
|
||||||
#include "seamcarving.h"
|
|
||||||
#include <opencv2/imgcodecs.hpp>
|
|
||||||
#include <opencv2/highgui/highgui.hpp>
|
|
||||||
#include <opencv2/imgproc.hpp>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cfloat>
|
|
||||||
|
|
||||||
SeamCarvingVertical::SeamCarvingVertical(char* fileName, int seams, bool grow) :
|
|
||||||
SeamCarving( cv::imread(fileName, cv::IMREAD_COLOR), seams, grow)
|
|
||||||
{
|
|
||||||
sliderMax = image.rows;
|
|
||||||
cv::Mat oldImage = image;
|
|
||||||
image = image.t();
|
|
||||||
init();
|
|
||||||
image = oldImage;
|
|
||||||
finalImage = finalImage.t();
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::Mat SeamCarvingVertical::drawSeam(const cv::Mat &frame, const std::vector<int> &seam)
|
|
||||||
{
|
|
||||||
cv::Mat retMat = frame.clone();
|
|
||||||
for(int col = 0; col < frame.cols; col++)
|
|
||||||
{
|
|
||||||
for(int row = 0; row < frame.rows; row++)
|
|
||||||
{
|
|
||||||
retMat.at<cv::Vec3b>(seam[col], col)[0] = 0;
|
|
||||||
retMat.at<cv::Vec3b>(seam[col], col)[1] = 255;
|
|
||||||
retMat.at<cv::Vec3b>(seam[col], col)[2] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return retMat;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SeamCarvingVertical::computeNewFinalImage(int pos)
|
|
||||||
{
|
|
||||||
cv::Mat oldImage = image;
|
|
||||||
image = image.t();
|
|
||||||
SeamCarving::computeNewFinalImage(pos);
|
|
||||||
image = oldImage;
|
|
||||||
finalImage = finalImage.t();
|
|
||||||
}
|
|
||||||
|
|
||||||
#if DEBUG
|
|
||||||
void SeamCarvingVertical::showSeamsImg()
|
|
||||||
{
|
|
||||||
cv::Mat oldImage = this->image;
|
|
||||||
this->image = this->image.t();
|
|
||||||
SeamCarving::showImage();
|
|
||||||
this->image = oldImage;
|
|
||||||
}
|
|
||||||
#endif
|
|
Loading…
Reference in a new issue