diff --git a/ObjectDetection/README.md b/ObjectDetection/README.md index a8a2c53..360589f 100644 --- a/ObjectDetection/README.md +++ b/ObjectDetection/README.md @@ -5,8 +5,10 @@ ## Создание датасета в формате YoloV4 для заданного объекта Команда для запуска: - blenderproc run obj2Yolov4dataset.py [obj] [output_dir] [--imgs 1] +``` +blenderproc run obj2Yolov4dataset.py [obj] [output_dir] [--imgs 1] +``` - obj: файл описания объекта *.obj - output_dir: выходной каталог - --imgs 1: количество изображений на выходе @@ -14,7 +16,9 @@ ## Создание датасета в формате YoloV4 для серии заданных объектов в заданной сцене Команда для запуска: - blenderproc run objs2Yolov4dataset.py [scene] [obj_path] [output_dir] [vhacd_path] [--imgs 1] +``` +blenderproc run objs2Yolov4dataset.py [scene] [obj_path] [output_dir] [vhacd_path] [--imgs 1] +``` - scene: путь к файлу описания сцены (*.blend) - obj_path: путь к каталогу с файлами описания детектируемых объектов *.obj - output_dir: выходной каталог @@ -25,13 +29,77 @@ Должен быть собран пакет [darknet](https://github.com/AlexeyAB/darknet) для работы на заданном ПО и оборудовании (CPU, GPU ...) +--- + ## Обучение нейросети и получение файла с её весами Команда для запуска: - darknet datector train [data] [cfg] [weight] +``` + darknet detector train [data] [cfg] [weight] +``` - data: файл с описанием датасета (*.data) - cfg: файл с описанием нейросети - weight: файл весов нейросети Для обучения нужно загрузить файл с предобученными весами (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) Для разного количества детектируемых объектов в выборке нужны свои файлы [data](https://gitlab.com/robossembler/framework/-/blob/master/ObjectDetection/yolov4_objs2.data) и [cfg](https://gitlab.com/robossembler/framework/-/blob/master/ObjectDetection/yolov4_objs2.cfg). + +--- + +## Команда для обнаружения объектов нейросетью с обученными весами +* вариант 1 (в файле t.txt - список изображений): +``` +darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output < t.txt > res.txt +``` + +* вариант 2 (файл 000015.jpg - тестовое изображение): +``` +darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output 000015.jpg > res.txt +``` +* вариант 3 (в файле t.txt - список изображений): +``` +darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output -out res.json < t.txt +``` + +Файл res.txt после запуска варианта 2: + +> net.optimized_memory = 0 +> mini_batch = 1, batch = 1, time_steps = 1, train = 0 +> Create CUDA-stream - 0 +> Create cudnn-handle 0 +> nms_kind: greedynms (1), beta = 0.600000 +> nms_kind: greedynms (1), beta = 0.600000 +> nms_kind: greedynms (1), beta = 0.600000 +> +> seen 64, trained: 768 K-images (12 Kilo-batches_64) +> Detection layer: 139 - type = 28 +> Detection layer: 150 - type = 28 +> Detection layer: 161 - type = 28 +>000015.jpg: Predicted in 620.357000 milli-seconds. +>fork.001: 94% (left_x: 145 top_y: -0 width: 38 height: 18) +>asm_element_edge.001: 28% (left_x: 195 top_y: 320 width: 40 height: 61) +>start_link.001: 87% (left_x: 197 top_y: 313 width: 39 height: 68) +>doking_link.001: 99% (left_x: 290 top_y: 220 width: 32 height: 21) +>start_link.001: 90% (left_x: 342 top_y: 198 width: 33 height: 34) +>doking_link.001: 80% (left_x: 342 top_y: 198 width: 32 height: 34) +>assemb_link.001: 100% (left_x: 426 top_y: 410 width: 45 height: 61) + + +Файл res.json после запуска варианта 3: +>[ +{ + "frame_id":1, + "filename":"img_test/000001.jpg", + "objects": [ + {"class_id":5, "name":"asm_element_edge.001", "relative_coordinates":{"center_x":0.498933, "center_y":0.502946, "width":0.083075, "height":0.073736}, "confidence":0.999638}, + {"class_id":4, "name":"grip-tool.001", "relative_coordinates":{"center_x":0.858856, "center_y":0.031339, "width":0.043919, "height":0.064563}, "confidence":0.996551} + ] +}, +{ + "frame_id":2, + "filename":"img_test/000002.jpg", + "objects": [ + {"class_id":1, "name":"start_link.001", "relative_coordinates":{"center_x":0.926026, "center_y":0.728457, "width":0.104029, "height":0.132757}, "confidence":0.995811}, + {"class_id":0, "name":"assemb_link.001", "relative_coordinates":{"center_x":0.280403, "center_y":0.129059, "width":0.029980, "height":0.025067}, "confidence":0.916782} + ] +} diff --git a/ObjectDetection/obj2Yolov4dataset.py b/ObjectDetection/obj2Yolov4dataset.py index d9960d3..3b0c6b5 100644 --- a/ObjectDetection/obj2Yolov4dataset.py +++ b/ObjectDetection/obj2Yolov4dataset.py @@ -6,6 +6,7 @@ import blenderproc as bproc Используется модуль blenderproc 24.01.2023 @shalenikol release 0.1 + 22.02.2023 @shalenikol release 0.2 исправлен расчёт x,y в convert2relative """ import numpy as np import argparse @@ -19,6 +20,8 @@ def convert2relative(height, width, bbox): YOLO format use relative coordinates for annotation """ x, y, w, h = bbox + x += w/2 + y += h/2 return x/width, y/height, w/width, h/height parser = argparse.ArgumentParser() diff --git a/ObjectDetection/objs2Yolov4dataset.py b/ObjectDetection/objs2Yolov4dataset.py index d84e839..a7d0023 100644 --- a/ObjectDetection/objs2Yolov4dataset.py +++ b/ObjectDetection/objs2Yolov4dataset.py @@ -6,6 +6,7 @@ import blenderproc as bproc Используется модуль blenderproc 17.02.2023 @shalenikol release 0.1 + 22.02.2023 @shalenikol release 0.2 исправлен расчёт x,y в convert2relative """ import sys import numpy as np @@ -20,6 +21,8 @@ def convert2relative(height, width, bbox): YOLO format use relative coordinates for annotation """ x, y, w, h = bbox + x += w/2 + y += h/2 return x/width, y/height, w/width, h/height parser = argparse.ArgumentParser() @@ -125,7 +128,8 @@ bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "na res_dir = os.path.join(args.output_dir, 'coco_data') # Цикл рендеринга -n = 3 # количество сэмплов для каждой локации камеры +n_cam_location = 5 # количество случайных локаций камеры +n_cam_poses = 3 # количество сэмплов для каждой локации камеры # Do multiple times: Position the shapenet objects using the physics simulator and render X images with random camera poses for r in range(args.imgs): # Randomly set the color and energy @@ -177,7 +181,7 @@ for r in range(args.imgs): # Sample up to X camera poses #an = np.random.uniform(0.78, 1.2) #1. #0.35 - for i in range(5): + for i in range(n_cam_location): # Sample location location = bproc.sampler.shell(center=[0, 0, 0], radius_min=1.1, @@ -187,10 +191,10 @@ for r in range(args.imgs): # координата, по которой будем сэмплировать положение камеры j = random.randint(0, 2) # разовый сдвиг по случайной координате - d = (coord_max[j] - coord_min[j]) / n + d = (coord_max[j] - coord_min[j]) / n_cam_poses if location[j] < 0: d = -d - for k in range(n): + for k in range(n_cam_poses): # Compute rotation based on vector going from location towards poi rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854)) # Add homog cam pose based on location an rotation @@ -217,16 +221,16 @@ with open(os.path.join(res_dir,"coco_annotations.json"), "r") as fh: y = json.load(fh) # список имен объектов -j = 0 +n_obj = 0 obj_list = [] with open(os.path.join(res_dir,"obj.names"), "w") as fh: for cat in y["categories"]: if cat["id"] < 999: n = cat["name"] i = cat["id"] - obj_list.append([n,i,j]) + obj_list.append([n,i,n_obj]) fh.write(n+"\n") - j += 1 + n_obj += 1 # содадим или очистим папку data для датасета res_data = os.path.join(res_dir, 'data') @@ -237,14 +241,31 @@ else: os.mkdir(res_data) # список имен файлов с изображениями +fn_image = os.path.join(res_dir,"images.txt") img_list = [] -with open(os.path.join(res_dir,"images.txt"), "w") as fh: +with open(fn_image, "w") as fh: for i in y["images"]: filename = i["file_name"] shutil.copy(os.path.join(res_dir,filename),res_data) fh.write(filename.replace('images','data')+"\n") img_list.append([i["id"], (os.path.split(filename))[1]]) +# создадим 2 списка имен файлов для train и valid +n_image_in_series = n_cam_location * n_cam_poses # количество изображений в серии +i = 0 +fh = open(fn_image, "r") +f1 = open(os.path.join(res_dir,"i_train.txt"), "w") +f2 = open(os.path.join(res_dir,"i_val.txt"), "w") +for line in fh: + i += 1 + if i % n_image_in_series == 0: + f2.write(line) + else: + f1.write(line) +fh.close() +f1.close() +f2.close() + # заполним файлы с метками bbox for i in y["annotations"]: cat_id = i["category_id"] @@ -264,3 +285,12 @@ for i in y["annotations"]: j = next(k for k, (_, x, _) in enumerate(obj_list) if x == cat_id) # формат: fh.write(f"{obj_list[j][2]} {rel[0]} {rel[1]} {rel[2]} {rel[3]}\n") + +# создадим файл описания датасета для darknet +with open(os.path.join(res_dir,"yolov4_objs2.data"), "w") as fh: + fh.write(f"classes = {n_obj}\n") + fh.write("train = i_train.txt\n") + fh.write("valid = i_val.txt\n") + fh.write("names = obj.names\n") + fh.write("backup = backup\n") + fh.write("eval = coco\n") \ No newline at end of file diff --git a/ObjectDetection/yolov4_test.cfg b/ObjectDetection/yolov4_test.cfg new file mode 100644 index 0000000..66926bd --- /dev/null +++ b/ObjectDetection/yolov4_test.cfg @@ -0,0 +1,1159 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +#batch=64 +#subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 12000 +policy=steps +steps=9600,10800 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 54 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=33 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=6 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1, -16 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=33 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=6 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=leaky + +[route] +layers = -1, -37 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=33 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=6 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5