dataset_generation and train_models as modules
This commit is contained in:
parent
69b8512d6b
commit
649a426c25
20 changed files with 412 additions and 4342 deletions
|
@ -1,15 +0,0 @@
|
|||
## Скрипт генерации датасета
|
||||
|
||||
Скрипт используется в составе web-сервиса для генерации датасетов с использованием заданной пользователем конфигурации.
|
||||
Должен быть установлен пакет [BlenderProc](https://github.com/DLR-RM/BlenderProc).
|
||||
|
||||
Команда для вызова:
|
||||
```bash
|
||||
blenderproc run renderBOPdataset.py --cfg CFG
|
||||
|
||||
options:
|
||||
--cfg CFG строка json с параметрами конфигурации датасета / путь к json-файлу с конфигурацией
|
||||
```
|
||||
|
||||
[Пример файла конфигурации датасета.](dataset_cfg.json)
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
{
|
||||
"dataSetObjects": ["fork"],
|
||||
"datasetType": "Object Detection - YOLOv8",
|
||||
"name": "123123e",
|
||||
"formBuilder": {
|
||||
"output": {
|
||||
"typedataset": "ObjectDetection",
|
||||
"dataset_path": "eqwfeadszxz",
|
||||
"models": [{"id": 1, "name": "fork"}],
|
||||
"models_randomization": { "loc_range_low": [-1, -1, 0.0], "loc_range_high": [1, 1, 2] },
|
||||
"scene": {
|
||||
"objects": [
|
||||
{"name": "floor", "collision_shape": "BOX", "loc_xyz":[0,0,0], "rot_euler":[0, 0, 0],
|
||||
"material_randomization": {"specular":[0,1], "roughness":[0,1], "metallic":[0,1], "base_color":[[0,0,0,1],[1,1,1,1]]}
|
||||
}
|
||||
],
|
||||
"lights": [
|
||||
{"id": 1, "type": "POINT", "loc_xyz":[5,5,5], "rot_euler":[-0.06, 0.61, -0.19],
|
||||
"color_range_low":[0.5, 0.5, 0.5], "color_range_high":[1, 1, 1],
|
||||
"energy_range":[400,900]
|
||||
},
|
||||
{"id": 2, "type": "SUN", "loc_xyz":[0,0,0], "rot_euler":[-0.01, 0.01, -0.01],
|
||||
"color_range_low":[1, 1, 1], "color_range_high":[1, 1, 1],
|
||||
"energy_range":[2,9]
|
||||
}
|
||||
]
|
||||
},
|
||||
"camera_position": { "center_shell": [0, 0, 0], "radius_range": [0.4, 1.4], "elevation_range": [10, 90] },
|
||||
"generation": {
|
||||
"n_cam_pose": 3,
|
||||
"n_sample_on_pose": 1,
|
||||
"n_series": 3,
|
||||
"image_format": "JPEG",
|
||||
"image_size_wh": [640, 480]
|
||||
}
|
||||
}
|
||||
},
|
||||
"processStatus": "exec",
|
||||
"local_path": "/home/user/5f4e161b-82d1-41fa-a11c-15d485b01600",
|
||||
"projectId": "660aaddbf98957a186f9c546"
|
||||
}
|
|
@ -1,361 +0,0 @@
|
|||
import blenderproc as bproc
|
||||
"""
|
||||
renderBOPdataset
|
||||
Общая задача: common pipeline
|
||||
Реализуемая функция: создание датасета в формате BOP с заданными параметрами рандомизации
|
||||
Используется модуль blenderproc
|
||||
|
||||
26.04.2024 @shalenikol release 0.1
|
||||
"""
|
||||
import numpy as np
|
||||
import argparse
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
VHACD_PATH = "blenderproc_resources/vhacd"
|
||||
DIR_MODELS = "models"
|
||||
FILE_LOG_SCENE = "res.txt"
|
||||
FILE_RBS_INFO = "rbs_info.json"
|
||||
FILE_GT_COCO = "scene_gt_coco.json"
|
||||
|
||||
Not_Categories_Name = True # наименование категории в COCO-аннотации отсутствует
|
||||
|
||||
def _get_path_model(name_model: str) -> str:
|
||||
# TODO on name_model find path for mesh (model.fbx)
|
||||
# local_path/assets/mesh/
|
||||
return os.path.join(rnd_par.output_dir, "assets/mesh/"+name_model+".fbx")
|
||||
|
||||
def _get_path_object(name_obj: str) -> str:
|
||||
# TODO on name_obj find path for scene object (object.fbx)
|
||||
return os.path.join(rnd_par.output_dir, "assets/mesh/"+name_obj+".fbx")
|
||||
|
||||
def convert2relative(height, width, bbox):
|
||||
"""
|
||||
YOLO format use relative coordinates for annotation
|
||||
"""
|
||||
x, y, w, h = bbox
|
||||
x += w/2
|
||||
y += h/2
|
||||
return x/width, y/height, w/width, h/height
|
||||
|
||||
def render() -> int:
|
||||
for obj in all_meshs:
|
||||
# Make the object actively participate in the physics simulation
|
||||
obj.enable_rigidbody(active=True, collision_shape="COMPOUND")
|
||||
# Also use convex decomposition as collision shapes
|
||||
obj.build_convex_decomposition_collision_shape(VHACD_PATH)
|
||||
|
||||
objs = all_meshs + rnd_par.scene.objs
|
||||
|
||||
log_txt = os.path.join(rnd_par.output_dir, FILE_LOG_SCENE)
|
||||
with open(log_txt, "w") as fh:
|
||||
for i,o in enumerate(objs):
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f"{i} : {o.get_name()} {loc} {euler} category_id = {o.get_cp('category_id')}\n")
|
||||
|
||||
# define a light and set its location and energy level
|
||||
ls = []
|
||||
for l in rnd_par.scene.light_data:
|
||||
light = bproc.types.Light(name=f"l{l['id']}")
|
||||
light.set_type(l["type"])
|
||||
light.set_location(l["loc_xyz"]) #[5, -5, 5])
|
||||
light.set_rotation_euler(l["rot_euler"]) #[-0.063, 0.6177, -0.1985])
|
||||
ls += [light]
|
||||
|
||||
# define the camera intrinsics
|
||||
bproc.camera.set_intrinsics_from_blender_params(1,
|
||||
rnd_par.image_size_wh[0],
|
||||
rnd_par.image_size_wh[1],
|
||||
lens_unit="FOV")
|
||||
|
||||
# add segmentation masks (per class and per instance)
|
||||
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
|
||||
|
||||
# activate depth rendering
|
||||
bproc.renderer.enable_depth_output(activate_antialiasing=False)
|
||||
|
||||
res_dir = os.path.join(rnd_par.output_dir, rnd_par.ds_name)
|
||||
if os.path.isdir(res_dir):
|
||||
shutil.rmtree(res_dir)
|
||||
# Цикл рендеринга
|
||||
# Do multiple times: Position the shapenet objects using the physics simulator and render X images with random camera poses
|
||||
for r in range(rnd_par.n_series):
|
||||
# один случайный объект в кадре / все заданные объекты
|
||||
random_obj = random.choice(range(rnd_par.scene.n_obj))
|
||||
meshs = []
|
||||
for i,o in enumerate(all_meshs): #objs
|
||||
if rnd_par.single_object and i != random_obj:
|
||||
continue
|
||||
meshs += [o]
|
||||
rnd_mat = rnd_par.scene.obj_data[i]["material_randomization"]
|
||||
mats = o.get_materials() #[0]
|
||||
for mat in mats:
|
||||
val = rnd_mat["specular"]
|
||||
mat.set_principled_shader_value("Specular", random.uniform(val[0], val[1]))
|
||||
val = rnd_mat["roughness"]
|
||||
mat.set_principled_shader_value("Roughness", random.uniform(val[0], val[1]))
|
||||
val = rnd_mat["base_color"]
|
||||
mat.set_principled_shader_value("Base Color", np.random.uniform(val[0], val[1]))
|
||||
val = rnd_mat["metallic"]
|
||||
mat.set_principled_shader_value("Metallic", random.uniform(val[0], val[1]))
|
||||
|
||||
# Randomly set the color and energy
|
||||
for i,l in enumerate(ls):
|
||||
current = rnd_par.scene.light_data[i]
|
||||
l.set_color(np.random.uniform(current["color_range_low"], current["color_range_high"]))
|
||||
energy = current["energy_range"]
|
||||
l.set_energy(random.uniform(energy[0], energy[1]))
|
||||
|
||||
# Clear all key frames from the previous run
|
||||
bproc.utility.reset_keyframes()
|
||||
|
||||
# Define a function that samples 6-DoF poses
|
||||
def sample_pose(obj: bproc.types.MeshObject):
|
||||
obj.set_location(np.random.uniform(rnd_par.loc_range_low, rnd_par.loc_range_high)) #[-1, -1, 0], [1, 1, 2]))
|
||||
obj.set_rotation_euler(bproc.sampler.uniformSO3())
|
||||
|
||||
# Sample the poses of all shapenet objects above the ground without any collisions in-between
|
||||
bproc.object.sample_poses(meshs,
|
||||
objects_to_check_collisions = meshs + rnd_par.scene.collision_objects,
|
||||
sample_pose_func = sample_pose)
|
||||
|
||||
# Run the simulation and fix the poses of the shapenet objects at the end
|
||||
bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)
|
||||
|
||||
# Find point of interest, all cam poses should look towards it
|
||||
poi = bproc.object.compute_poi(meshs)
|
||||
|
||||
coord_max = [0.1, 0.1, 0.1]
|
||||
coord_min = [0., 0., 0.]
|
||||
|
||||
with open(log_txt, "a") as fh:
|
||||
fh.write("*****************\n")
|
||||
fh.write(f"{r}) poi = {poi}\n")
|
||||
i = 0
|
||||
for o in meshs:
|
||||
i += 1
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f" {i} : {o.get_name()} {loc} {euler}\n")
|
||||
for j in range(3):
|
||||
if loc[j] < coord_min[j]:
|
||||
coord_min[j] = loc[j]
|
||||
if loc[j] > coord_max[j]:
|
||||
coord_max[j] = loc[j]
|
||||
|
||||
# Sample up to X camera poses
|
||||
#an = np.random.uniform(0.78, 1.2) #1. #0.35
|
||||
for i in range(rnd_par.n_cam_pose):
|
||||
# Sample location
|
||||
location = bproc.sampler.shell(center=rnd_par.center_shell,
|
||||
radius_min=rnd_par.radius_range[0],
|
||||
radius_max=rnd_par.radius_range[1],
|
||||
elevation_min=rnd_par.elevation_range[0],
|
||||
elevation_max=rnd_par.elevation_range[1])
|
||||
# координата, по которой будем сэмплировать положение камеры
|
||||
j = random.randint(0, 2)
|
||||
# разовый сдвиг по случайной координате
|
||||
d = (coord_max[j] - coord_min[j]) / rnd_par.n_sample_on_pose
|
||||
if location[j] < 0:
|
||||
d = -d
|
||||
for _ in range(rnd_par.n_sample_on_pose):
|
||||
# Compute rotation based on vector going from location towards poi
|
||||
rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
|
||||
# Add homog cam pose based on location an rotation
|
||||
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
|
||||
bproc.camera.add_camera_pose(cam2world_matrix)
|
||||
location[j] -= d
|
||||
# render the whole pipeline
|
||||
data = bproc.renderer.render()
|
||||
# Write data to bop format
|
||||
bproc.writer.write_bop(res_dir,
|
||||
target_objects = all_meshs, # Optional[List[MeshObject]] = None
|
||||
depths = data["depth"],
|
||||
depth_scale = 1.0,
|
||||
colors = data["colors"],
|
||||
color_file_format=rnd_par.image_format,
|
||||
append_to_existing_output = (r>0),
|
||||
save_world2cam = False) # world coords are arbitrary in most real BOP datasets
|
||||
# dataset="robo_ds",
|
||||
|
||||
models_dir = os.path.join(res_dir, DIR_MODELS)
|
||||
os.mkdir(models_dir)
|
||||
|
||||
data = []
|
||||
for i,objn in enumerate(rnd_par.models.names):
|
||||
rec = {}
|
||||
rec["id"] = i+1
|
||||
rec["name"] = objn
|
||||
rec["model"] = os.path.join(DIR_MODELS, os.path.split(rnd_par.models.filenames[i])[1]) # путь относительный
|
||||
t = [obj.get_bound_box(local_coords=True).tolist() for obj in all_meshs if obj.get_name() == objn]
|
||||
rec["cuboid"] = t[0]
|
||||
data.append(rec)
|
||||
shutil.copy2(rnd_par.models.filenames[i], models_dir)
|
||||
f = (os.path.splitext(rnd_par.models.filenames[i]))[0] + ".mtl" # файл материала
|
||||
if os.path.isfile(f):
|
||||
shutil.copy2(f, models_dir)
|
||||
|
||||
with open(os.path.join(res_dir, FILE_RBS_INFO), "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
|
||||
"""
|
||||
!!! categories -> name берётся из category_id !!!
|
||||
см.ниже
|
||||
blenderproc.python.writer : BopWriterUtility.py
|
||||
class _BopWriterUtility
|
||||
def calc_gt_coco
|
||||
...
|
||||
CATEGORIES = [{'id': obj.get_cp('category_id'), 'name': str(obj.get_cp('category_id')), 'supercategory':
|
||||
dataset_name} for obj in dataset_objects]
|
||||
|
||||
поэтому заменим наименование категории в аннотации
|
||||
"""
|
||||
def change_categories_name(dir: str):
|
||||
coco_file = os.path.join(dir,FILE_GT_COCO)
|
||||
with open(coco_file, "r") as fh:
|
||||
data = json.load(fh)
|
||||
cats = data["categories"]
|
||||
|
||||
for i,cat in enumerate(cats):
|
||||
cat["name"] = rnd_par.models.names[i] #obj_names[i]
|
||||
|
||||
with open(coco_file, "w") as fh:
|
||||
json.dump(data, fh, indent=0)
|
||||
|
||||
def explore(path: str):
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
folders = [
|
||||
os.path.join(path, o)
|
||||
for o in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, o))
|
||||
]
|
||||
for path_entry in folders:
|
||||
print(path_entry)
|
||||
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)):
|
||||
change_categories_name(path_entry)
|
||||
else:
|
||||
explore(path_entry)
|
||||
|
||||
if Not_Categories_Name:
|
||||
explore(res_dir)
|
||||
return 0 # success
|
||||
|
||||
def _get_models(par, data) -> int:
|
||||
global all_meshs
|
||||
|
||||
par.models = lambda: None
|
||||
par.models.n_item = len(data)
|
||||
if par.models.n_item == 0:
|
||||
return 0 # no models
|
||||
|
||||
# загрузим объекты
|
||||
par.models.names = [] # obj_names
|
||||
par.models.filenames = [] # obj_filenames
|
||||
i = 1
|
||||
for f in data:
|
||||
nam = f
|
||||
par.models.names.append(nam)
|
||||
ff = _get_path_model(nam)
|
||||
par.models.filenames.append(ff)
|
||||
if not os.path.isfile(ff):
|
||||
print(f"Error: no such file '{ff}'")
|
||||
return -1
|
||||
obj = bproc.loader.load_obj(ff)
|
||||
all_meshs += obj
|
||||
obj[0].set_cp("category_id", i) # начиная с 1
|
||||
i += 1
|
||||
return par.models.n_item
|
||||
|
||||
def _get_scene(par, data) -> int:
|
||||
# load scene
|
||||
par.scene = lambda: None
|
||||
objs = data["objects"]
|
||||
par.scene.n_obj = len(objs)
|
||||
if par.scene.n_obj == 0:
|
||||
return 0 # empty scene
|
||||
lights = data["lights"]
|
||||
par.scene.n_light = len(lights)
|
||||
if par.scene.n_light == 0:
|
||||
return 0 # no lighting
|
||||
|
||||
par.scene.objs = []
|
||||
par.scene.collision_objects = []
|
||||
for f in objs:
|
||||
ff = _get_path_object(f["name"])
|
||||
if not os.path.isfile(ff):
|
||||
print(f"Error: no such file '{ff}'")
|
||||
return -1
|
||||
obj = bproc.loader.load_obj(ff)
|
||||
obj[0].set_cp("category_id", 999)
|
||||
coll = f["collision_shape"]
|
||||
if len(coll) > 0:
|
||||
obj[0].enable_rigidbody(False, collision_shape=coll)
|
||||
par.scene.collision_objects += obj
|
||||
par.scene.objs += obj
|
||||
|
||||
if not par.scene.collision_objects:
|
||||
print("Collision objects not found in the scene")
|
||||
return 0
|
||||
par.scene.obj_data = objs
|
||||
par.scene.light_data = lights
|
||||
return par.scene.n_obj
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--cfg", required=True, help="Json-string with dataset parameters")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.cfg[-5:] == ".json":
|
||||
if not os.path.isfile(args.cfg):
|
||||
print(f"Error: no such file '{args.cfg}'")
|
||||
exit(-1)
|
||||
with open(args.cfg, "r") as f:
|
||||
j_data = f.read()
|
||||
else:
|
||||
j_data = args.cfg
|
||||
try:
|
||||
cfg = json.loads(j_data)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"JSon error: {e}")
|
||||
exit(-2)
|
||||
|
||||
ds_cfg = cfg["formBuilder"]["output"] # dataset config
|
||||
generation = ds_cfg["generation"]
|
||||
cam_pos = ds_cfg["camera_position"]
|
||||
models_randomization = ds_cfg["models_randomization"]
|
||||
|
||||
rnd_par = lambda: None
|
||||
rnd_par.single_object = True
|
||||
rnd_par.ds_name = cfg["name"]
|
||||
rnd_par.output_dir = cfg["local_path"]
|
||||
rnd_par.dataset_objs = cfg["dataSetObjects"]
|
||||
rnd_par.n_cam_pose = generation["n_cam_pose"]
|
||||
rnd_par.n_sample_on_pose = generation["n_sample_on_pose"]
|
||||
rnd_par.n_series = generation["n_series"]
|
||||
rnd_par.image_format = generation["image_format"]
|
||||
rnd_par.image_size_wh = generation["image_size_wh"]
|
||||
rnd_par.center_shell = cam_pos["center_shell"]
|
||||
rnd_par.radius_range = cam_pos["radius_range"]
|
||||
rnd_par.elevation_range = cam_pos["elevation_range"]
|
||||
rnd_par.loc_range_low = models_randomization["loc_range_low"]
|
||||
rnd_par.loc_range_high = models_randomization["loc_range_high"]
|
||||
|
||||
if not os.path.isdir(rnd_par.output_dir):
|
||||
print(f"Error: invalid path '{rnd_par.output_dir}'")
|
||||
exit(-3)
|
||||
|
||||
bproc.init()
|
||||
|
||||
all_meshs = []
|
||||
ret = _get_models(rnd_par, rnd_par.dataset_objs)
|
||||
if ret <= 0:
|
||||
print("Error: no models in config")
|
||||
exit(-4)
|
||||
if _get_scene(rnd_par, ds_cfg["scene"]) == 0:
|
||||
print("Error: empty scene in config")
|
||||
exit(-5)
|
||||
exit(render())
|
|
@ -1,105 +0,0 @@
|
|||
# Инструкция для запуска
|
||||
|
||||
Должен быть установлен пакет [BlenderProc](https://github.com/DLR-RM/BlenderProc)
|
||||
|
||||
## Создание датасета в формате YoloV4 для заданного объекта
|
||||
|
||||
Команда для запуска:
|
||||
|
||||
```
|
||||
blenderproc run obj2Yolov4dataset.py [obj] [output_dir] [--imgs 1]
|
||||
```
|
||||
- obj: файл описания объекта *.obj
|
||||
- output_dir: выходной каталог
|
||||
- --imgs 1: количество изображений на выходе
|
||||
|
||||
## Создание датасета в формате YoloV4 для серии заданных объектов в заданной сцене
|
||||
|
||||
Команда для запуска:
|
||||
```
|
||||
blenderproc run objs2Yolov4dataset.py [scene] [obj_path] [output_dir] [vhacd_path] [--imgs 1]
|
||||
```
|
||||
- scene: путь к файлу описания сцены (*.blend)
|
||||
- obj_path: путь к каталогу с файлами описания детектируемых объектов *.obj
|
||||
- output_dir: выходной каталог
|
||||
- vhacd_path: каталог, в котором должен быть установлен или уже установлен vhacd (по умолчанию blenderproc_resources/vhacd)
|
||||
- --imgs 1: количество серий рендеринга (по 15 изображений каждая) на выходе (например, если imgs=100, то будет получено 1500 изображений)
|
||||
|
||||
Файл описания сцены обязательно должен содержать плоскость (с именем 'floor'), на которую будут сэмплированы объекты для обнаружения.
|
||||
|
||||
Должен быть собран пакет [darknet](https://github.com/AlexeyAB/darknet) для работы на заданном ПО и оборудовании (CPU, GPU ...)
|
||||
|
||||
---
|
||||
|
||||
## Обучение нейросети и получение файла с её весами
|
||||
|
||||
Команда для запуска:
|
||||
```
|
||||
darknet detector train [data] [cfg] [weight]
|
||||
```
|
||||
- data: файл с описанием датасета (*.data)
|
||||
- cfg: файл с описанием нейросети
|
||||
- weight: файл весов нейросети
|
||||
|
||||
Для обучения нужно загрузить файл с предобученными весами (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137)
|
||||
Для разного количества детектируемых объектов в выборке нужны свои файлы [data](https://gitlab.com/robossembler/framework/-/blob/master/ObjectDetection/yolov4_objs2.data) и [cfg](https://gitlab.com/robossembler/framework/-/blob/master/ObjectDetection/yolov4_objs2.cfg).
|
||||
|
||||
---
|
||||
|
||||
## Команда для обнаружения объектов нейросетью с обученными весами
|
||||
* вариант 1 (в файле t.txt - список изображений):
|
||||
```
|
||||
darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output < t.txt > res.txt
|
||||
```
|
||||
|
||||
* вариант 2 (файл 000015.jpg - тестовое изображение):
|
||||
```
|
||||
darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output 000015.jpg > res.txt
|
||||
```
|
||||
* вариант 3 (в файле t.txt - список изображений):
|
||||
```
|
||||
darknet detector test yolov4_objs2.data yolov4_test.cfg yolov4_objs2_final.weights -dont_show -ext_output -out res.json < t.txt
|
||||
```
|
||||
|
||||
Файл res.txt после запуска варианта 2:
|
||||
|
||||
> net.optimized_memory = 0
|
||||
> mini_batch = 1, batch = 1, time_steps = 1, train = 0
|
||||
> Create CUDA-stream - 0
|
||||
> Create cudnn-handle 0
|
||||
> nms_kind: greedynms (1), beta = 0.600000
|
||||
> nms_kind: greedynms (1), beta = 0.600000
|
||||
> nms_kind: greedynms (1), beta = 0.600000
|
||||
>
|
||||
> seen 64, trained: 768 K-images (12 Kilo-batches_64)
|
||||
> Detection layer: 139 - type = 28
|
||||
> Detection layer: 150 - type = 28
|
||||
> Detection layer: 161 - type = 28
|
||||
>000015.jpg: Predicted in 620.357000 milli-seconds.
|
||||
>fork.001: 94% (left_x: 145 top_y: -0 width: 38 height: 18)
|
||||
>asm_element_edge.001: 28% (left_x: 195 top_y: 320 width: 40 height: 61)
|
||||
>start_link.001: 87% (left_x: 197 top_y: 313 width: 39 height: 68)
|
||||
>doking_link.001: 99% (left_x: 290 top_y: 220 width: 32 height: 21)
|
||||
>start_link.001: 90% (left_x: 342 top_y: 198 width: 33 height: 34)
|
||||
>doking_link.001: 80% (left_x: 342 top_y: 198 width: 32 height: 34)
|
||||
>assemb_link.001: 100% (left_x: 426 top_y: 410 width: 45 height: 61)
|
||||
|
||||
|
||||
Файл res.json после запуска варианта 3:
|
||||
>[
|
||||
{
|
||||
"frame_id":1,
|
||||
"filename":"img_test/000001.jpg",
|
||||
"objects": [
|
||||
{"class_id":5, "name":"asm_element_edge.001", "relative_coordinates":{"center_x":0.498933, "center_y":0.502946, "width":0.083075, "height":0.073736}, "confidence":0.999638},
|
||||
{"class_id":4, "name":"grip-tool.001", "relative_coordinates":{"center_x":0.858856, "center_y":0.031339, "width":0.043919, "height":0.064563}, "confidence":0.996551}
|
||||
]
|
||||
},
|
||||
{
|
||||
"frame_id":2,
|
||||
"filename":"img_test/000002.jpg",
|
||||
"objects": [
|
||||
{"class_id":1, "name":"start_link.001", "relative_coordinates":{"center_x":0.926026, "center_y":0.728457, "width":0.104029, "height":0.132757}, "confidence":0.995811},
|
||||
{"class_id":0, "name":"assemb_link.001", "relative_coordinates":{"center_x":0.280403, "center_y":0.129059, "width":0.029980, "height":0.025067}, "confidence":0.916782}
|
||||
]
|
||||
}
|
|
@ -1,144 +0,0 @@
|
|||
import blenderproc as bproc
|
||||
"""
|
||||
obj2Yolov4dataset
|
||||
Общая задача: обнаружение объекта (Object detection)
|
||||
Реализуемая функция: создание датасета в формате YoloV4 для заданного объекта (*.obj)
|
||||
Используется модуль blenderproc
|
||||
|
||||
24.01.2023 @shalenikol release 0.1
|
||||
22.02.2023 @shalenikol release 0.2 исправлен расчёт x,y в convert2relative
|
||||
"""
|
||||
import numpy as np
|
||||
import argparse
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
def convert2relative(height, width, bbox):
|
||||
"""
|
||||
YOLO format use relative coordinates for annotation
|
||||
"""
|
||||
x, y, w, h = bbox
|
||||
x += w/2
|
||||
y += h/2
|
||||
return x/width, y/height, w/width, h/height
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('scene', nargs='?', default="resources/robossembler-asset.obj", help="Path to the object file.")
|
||||
parser.add_argument('output_dir', nargs='?', default="output", help="Path to where the final files, will be saved")
|
||||
parser.add_argument('--imgs', default=1, type=int, help="The number of times the objects should be rendered.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.output_dir):
|
||||
os.mkdir(args.output_dir)
|
||||
|
||||
bproc.init()
|
||||
|
||||
# load the objects into the scene
|
||||
obj = bproc.loader.load_obj(args.scene)[0]
|
||||
obj.set_cp("category_id", 1)
|
||||
|
||||
# Randomly perturbate the material of the object
|
||||
mat = obj.get_materials()[0]
|
||||
mat.set_principled_shader_value("Specular", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Roughness", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Base Color", np.random.uniform([0, 0, 0, 1], [1, 1, 1, 1]))
|
||||
mat.set_principled_shader_value("Metallic", random.uniform(0, 1))
|
||||
|
||||
# Create a new light
|
||||
light = bproc.types.Light()
|
||||
light.set_type("POINT")
|
||||
# Sample its location around the object
|
||||
light.set_location(bproc.sampler.shell(
|
||||
center=obj.get_location(),
|
||||
radius_min=1,
|
||||
radius_max=5,
|
||||
elevation_min=1,
|
||||
elevation_max=89
|
||||
))
|
||||
# Randomly set the color and energy
|
||||
light.set_color(np.random.uniform([0.5, 0.5, 0.5], [1, 1, 1]))
|
||||
light.set_energy(random.uniform(100, 1000))
|
||||
|
||||
bproc.camera.set_resolution(640, 480)
|
||||
|
||||
# Sample five camera poses
|
||||
poses = 0
|
||||
tries = 0
|
||||
while tries < 10000 and poses < args.imgs:
|
||||
# Sample random camera location around the object
|
||||
location = bproc.sampler.shell(
|
||||
center=obj.get_location(),
|
||||
radius_min=1,
|
||||
radius_max=4,
|
||||
elevation_min=1,
|
||||
elevation_max=89
|
||||
)
|
||||
# Compute rotation based lookat point which is placed randomly around the object
|
||||
lookat_point = obj.get_location() + np.random.uniform([-0.5, -0.5, -0.5], [0.5, 0.5, 0.5])
|
||||
rotation_matrix = bproc.camera.rotation_from_forward_vec(lookat_point - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
|
||||
# Add homog cam pose based on location an rotation
|
||||
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
|
||||
|
||||
# Only add camera pose if object is still visible
|
||||
if obj in bproc.camera.visible_objects(cam2world_matrix):
|
||||
bproc.camera.add_camera_pose(cam2world_matrix)
|
||||
poses += 1
|
||||
tries += 1
|
||||
|
||||
# Enable transparency so the background becomes transparent
|
||||
bproc.renderer.set_output_format(enable_transparency=True)
|
||||
# add segmentation masks (per class and per instance)
|
||||
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
|
||||
|
||||
# Render RGB images
|
||||
data = bproc.renderer.render()
|
||||
|
||||
# Write data to coco file
|
||||
res_dir = os.path.join(args.output_dir, 'coco_data')
|
||||
bproc.writer.write_coco_annotations(res_dir,
|
||||
instance_segmaps=data["instance_segmaps"],
|
||||
instance_attribute_maps=data["instance_attribute_maps"],
|
||||
color_file_format='JPEG',
|
||||
colors=data["colors"],
|
||||
append_to_existing_output=True)
|
||||
|
||||
#загрузим аннотацию
|
||||
with open(os.path.join(res_dir,"coco_annotations.json"), "r") as fh:
|
||||
y = json.load(fh)
|
||||
|
||||
# список имен объектов
|
||||
with open(os.path.join(res_dir,"obj.names"), "w") as fh:
|
||||
for cat in y["categories"]:
|
||||
fh.write(cat["name"]+"\n")
|
||||
|
||||
# содадим или очистим папку data для датасета
|
||||
res_data = os.path.join(res_dir, 'data')
|
||||
if os.path.isdir(res_data):
|
||||
for f in os.listdir(res_data):
|
||||
os.remove(os.path.join(res_data, f))
|
||||
else:
|
||||
os.mkdir(res_data)
|
||||
|
||||
# список имен файлов с изображениями
|
||||
s = []
|
||||
with open(os.path.join(res_dir,"images.txt"), "w") as fh:
|
||||
for i in y["images"]:
|
||||
filename = i["file_name"]
|
||||
shutil.copy(os.path.join(res_dir,filename),res_data)
|
||||
fh.write(filename.replace('images','data')+"\n")
|
||||
s.append((os.path.split(filename))[1])
|
||||
|
||||
# предполагается, что "images" и "annotations" следуют в одном и том же порядке
|
||||
c = 0
|
||||
for i in y["annotations"]:
|
||||
bbox = i["bbox"]
|
||||
im_h = i["height"]
|
||||
im_w = i["width"]
|
||||
rel = convert2relative(im_h,im_w,bbox)
|
||||
fn = (os.path.splitext(s[c]))[0] # только имя файла
|
||||
with open(os.path.join(res_data,fn+".txt"), "w") as fh:
|
||||
# формат: <target> <x-center> <y-center> <width> <height>
|
||||
fh.write("0 "+'{:-f} {:-f} {:-f} {:-f}'.format(rel[0],rel[1],rel[2],rel[3])+"\n")
|
||||
c += 1
|
|
@ -1,296 +0,0 @@
|
|||
import blenderproc as bproc
|
||||
"""
|
||||
objs2Yolov4dataset
|
||||
Общая задача: обнаружение объекта (Object detection)
|
||||
Реализуемая функция: создание датасета в формате YoloV4 для серии заданных объектов (*.obj) в заданной сцене (*.blend)
|
||||
Используется модуль blenderproc
|
||||
|
||||
17.02.2023 @shalenikol release 0.1
|
||||
22.02.2023 @shalenikol release 0.2 исправлен расчёт x,y в convert2relative
|
||||
"""
|
||||
import sys
|
||||
import numpy as np
|
||||
import argparse
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
def convert2relative(height, width, bbox):
|
||||
"""
|
||||
YOLO format use relative coordinates for annotation
|
||||
"""
|
||||
x, y, w, h = bbox
|
||||
x += w/2
|
||||
y += h/2
|
||||
return x/width, y/height, w/width, h/height
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('scene', nargs='?', default="resources/sklad.blend", help="Path to the scene object.")
|
||||
parser.add_argument('obj_path', nargs='?', default="resources/in_obj", help="Path to the object files.")
|
||||
parser.add_argument('output_dir', nargs='?', default="output", help="Path to where the final files, will be saved")
|
||||
parser.add_argument('vhacd_path', nargs='?', default="blenderproc_resources/vhacd", help="The directory in which vhacd should be installed or is already installed.")
|
||||
parser.add_argument('--imgs', default=2, type=int, help="The number of times the objects should be rendered.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.obj_path):
|
||||
print(f"{args.obj_path} : no object directory")
|
||||
sys.exit()
|
||||
|
||||
if not os.path.isdir(args.output_dir):
|
||||
os.mkdir(args.output_dir)
|
||||
|
||||
bproc.init()
|
||||
|
||||
# ? загрузим свет из сцены
|
||||
#cam = bproc.loader.load_blend(args.scene, data_blocks=["cameras"])
|
||||
#lights = bproc.loader.load_blend(args.scene, data_blocks=["lights"])
|
||||
|
||||
# загрузим объекты
|
||||
list_files = os.listdir(args.obj_path)
|
||||
meshs = []
|
||||
i = 0
|
||||
for f in list_files:
|
||||
if (os.path.splitext(f))[1] == ".obj":
|
||||
f = os.path.join(args.obj_path, f) # путь к файлу объекта
|
||||
if os.path.isfile(f):
|
||||
meshs += bproc.loader.load_obj(f)
|
||||
i += 1
|
||||
|
||||
if i == 0:
|
||||
print("Objects not found")
|
||||
sys.exit()
|
||||
|
||||
for i,o in enumerate(meshs):
|
||||
o.set_cp("category_id", i+1)
|
||||
|
||||
# загрузим сцену
|
||||
scene = bproc.loader.load_blend(args.scene, data_blocks=["objects"])
|
||||
#scene = bproc.loader.load_obj(args.scene)
|
||||
|
||||
# найдём пол
|
||||
floor = None
|
||||
for o in scene:
|
||||
o.set_cp("category_id", 999)
|
||||
s = o.get_name()
|
||||
if s.find("floor") >= 0:
|
||||
floor = o
|
||||
if floor == None:
|
||||
print("Floor not found in the scene")
|
||||
sys.exit()
|
||||
|
||||
floor.enable_rigidbody(False, collision_shape='BOX')
|
||||
|
||||
objs = meshs + scene
|
||||
|
||||
for obj in meshs:
|
||||
# Make the object actively participate in the physics simulation
|
||||
obj.enable_rigidbody(active=True, collision_shape="COMPOUND")
|
||||
# Also use convex decomposition as collision shapes
|
||||
obj.build_convex_decomposition_collision_shape(args.vhacd_path)
|
||||
|
||||
with open(os.path.join(args.output_dir,"res.txt"), "w") as fh:
|
||||
# fh.write(str(type(scene[0]))+"\n")
|
||||
i = 0
|
||||
for o in objs:
|
||||
i += 1
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f"{i} : {o.get_name()} {loc} {euler}\n")
|
||||
|
||||
# define a light and set its location and energy level
|
||||
light = bproc.types.Light()
|
||||
light.set_type("POINT")
|
||||
light.set_location([5, -5, 5])
|
||||
#light.set_energy(900)
|
||||
#light.set_color([0.7, 0.7, 0.7])
|
||||
|
||||
light1 = bproc.types.Light(name="light1")
|
||||
light1.set_type("SUN")
|
||||
light1.set_location([0, 0, 0])
|
||||
light1.set_rotation_euler([-0.063, 0.6177, -0.1985])
|
||||
#light1.set_energy(7)
|
||||
light1.set_color([1, 1, 1])
|
||||
"""
|
||||
# Sample its location around the object
|
||||
light.set_location(bproc.sampler.shell(
|
||||
center=obj.get_location(),
|
||||
radius_min=2.5,
|
||||
radius_max=5,
|
||||
elevation_min=1,
|
||||
elevation_max=89
|
||||
))
|
||||
"""
|
||||
|
||||
# define the camera intrinsics
|
||||
bproc.camera.set_intrinsics_from_blender_params(1, 640, 480, lens_unit="FOV")
|
||||
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
|
||||
|
||||
res_dir = os.path.join(args.output_dir, 'coco_data')
|
||||
# Цикл рендеринга
|
||||
n_cam_location = 5 # количество случайных локаций камеры
|
||||
n_cam_poses = 3 # количество сэмплов для каждой локации камеры
|
||||
# Do multiple times: Position the shapenet objects using the physics simulator and render X images with random camera poses
|
||||
for r in range(args.imgs):
|
||||
# Randomly set the color and energy
|
||||
light.set_color(np.random.uniform([0.5, 0.5, 0.5], [1, 1, 1]))
|
||||
light.set_energy(random.uniform(500, 1000))
|
||||
light1.set_energy(random.uniform(3, 11))
|
||||
|
||||
for i,o in enumerate(objs):
|
||||
mat = o.get_materials()[0]
|
||||
mat.set_principled_shader_value("Specular", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Roughness", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Base Color", np.random.uniform([0, 0, 0, 1], [1, 1, 1, 1]))
|
||||
mat.set_principled_shader_value("Metallic", random.uniform(0, 1))
|
||||
|
||||
# Clear all key frames from the previous run
|
||||
bproc.utility.reset_keyframes()
|
||||
|
||||
# Define a function that samples 6-DoF poses
|
||||
def sample_pose(obj: bproc.types.MeshObject):
|
||||
obj.set_location(np.random.uniform([-1, -1.5, 0.2], [1, 2, 1.2])) #[-1, -1, 0], [1, 1, 2]))
|
||||
obj.set_rotation_euler(bproc.sampler.uniformSO3())
|
||||
|
||||
# Sample the poses of all shapenet objects above the ground without any collisions in-between
|
||||
bproc.object.sample_poses(meshs, objects_to_check_collisions = meshs + [floor], sample_pose_func = sample_pose)
|
||||
|
||||
# Run the simulation and fix the poses of the shapenet objects at the end
|
||||
bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)
|
||||
|
||||
# Find point of interest, all cam poses should look towards it
|
||||
poi = bproc.object.compute_poi(meshs)
|
||||
|
||||
coord_max = [0.1, 0.1, 0.1]
|
||||
coord_min = [0., 0., 0.]
|
||||
|
||||
with open(os.path.join(args.output_dir,"res.txt"), "a") as fh:
|
||||
fh.write("*****************\n")
|
||||
fh.write(f"{r}) poi = {poi}\n")
|
||||
i = 0
|
||||
for o in meshs:
|
||||
i += 1
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f" {i} : {o.get_name()} {loc} {euler}\n")
|
||||
for j in range(3):
|
||||
if loc[j] < coord_min[j]:
|
||||
coord_min[j] = loc[j]
|
||||
if loc[j] > coord_max[j]:
|
||||
coord_max[j] = loc[j]
|
||||
|
||||
# Sample up to X camera poses
|
||||
#an = np.random.uniform(0.78, 1.2) #1. #0.35
|
||||
for i in range(n_cam_location):
|
||||
# Sample location
|
||||
location = bproc.sampler.shell(center=[0, 0, 0],
|
||||
radius_min=1.1,
|
||||
radius_max=3.3,
|
||||
elevation_min=5,
|
||||
elevation_max=89)
|
||||
# координата, по которой будем сэмплировать положение камеры
|
||||
j = random.randint(0, 2)
|
||||
# разовый сдвиг по случайной координате
|
||||
d = (coord_max[j] - coord_min[j]) / n_cam_poses
|
||||
if location[j] < 0:
|
||||
d = -d
|
||||
for k in range(n_cam_poses):
|
||||
# Compute rotation based on vector going from location towards poi
|
||||
rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
|
||||
# Add homog cam pose based on location an rotation
|
||||
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
|
||||
bproc.camera.add_camera_pose(cam2world_matrix)
|
||||
location[j] -= d
|
||||
#world_matrix = bproc.math.build_transformation_mat([2.3, -0.4, 0.66], [1.396, 0., an])
|
||||
#bproc.camera.add_camera_pose(world_matrix)
|
||||
#an += 0.2
|
||||
|
||||
# render the whole pipeline
|
||||
data = bproc.renderer.render()
|
||||
|
||||
# Write data to coco file
|
||||
bproc.writer.write_coco_annotations(res_dir,
|
||||
instance_segmaps=data["instance_segmaps"],
|
||||
instance_attribute_maps=data["instance_attribute_maps"],
|
||||
color_file_format='JPEG',
|
||||
colors=data["colors"],
|
||||
append_to_existing_output=True)
|
||||
|
||||
#загрузим аннотацию
|
||||
with open(os.path.join(res_dir,"coco_annotations.json"), "r") as fh:
|
||||
y = json.load(fh)
|
||||
|
||||
# список имен объектов
|
||||
n_obj = 0
|
||||
obj_list = []
|
||||
with open(os.path.join(res_dir,"obj.names"), "w") as fh:
|
||||
for cat in y["categories"]:
|
||||
if cat["id"] < 999:
|
||||
n = cat["name"]
|
||||
i = cat["id"]
|
||||
obj_list.append([n,i,n_obj])
|
||||
fh.write(n+"\n")
|
||||
n_obj += 1
|
||||
|
||||
# содадим или очистим папку data для датасета
|
||||
res_data = os.path.join(res_dir, 'data')
|
||||
if os.path.isdir(res_data):
|
||||
for f in os.listdir(res_data):
|
||||
os.remove(os.path.join(res_data, f))
|
||||
else:
|
||||
os.mkdir(res_data)
|
||||
|
||||
# список имен файлов с изображениями
|
||||
fn_image = os.path.join(res_dir,"images.txt")
|
||||
img_list = []
|
||||
with open(fn_image, "w") as fh:
|
||||
for i in y["images"]:
|
||||
filename = i["file_name"]
|
||||
shutil.copy(os.path.join(res_dir,filename),res_data)
|
||||
fh.write(filename.replace('images','data')+"\n")
|
||||
img_list.append([i["id"], (os.path.split(filename))[1]])
|
||||
|
||||
# создадим 2 списка имен файлов для train и valid
|
||||
n_image_in_series = n_cam_location * n_cam_poses # количество изображений в серии
|
||||
i = 0
|
||||
fh = open(fn_image, "r")
|
||||
f1 = open(os.path.join(res_dir,"i_train.txt"), "w")
|
||||
f2 = open(os.path.join(res_dir,"i_val.txt"), "w")
|
||||
for line in fh:
|
||||
i += 1
|
||||
if i % n_image_in_series == 0:
|
||||
f2.write(line)
|
||||
else:
|
||||
f1.write(line)
|
||||
fh.close()
|
||||
f1.close()
|
||||
f2.close()
|
||||
|
||||
# заполним файлы с метками bbox
|
||||
for i in y["annotations"]:
|
||||
cat_id = i["category_id"]
|
||||
if cat_id < 999:
|
||||
im_id = i["image_id"]
|
||||
bbox = i["bbox"]
|
||||
im_h = i["height"]
|
||||
im_w = i["width"]
|
||||
rel = convert2relative(im_h,im_w,bbox)
|
||||
|
||||
# находим индекс списка с нужным изображением
|
||||
j = next(k for k, (x, _) in enumerate(img_list) if x == im_id)
|
||||
filename = img_list[j][1]
|
||||
fn = (os.path.splitext(filename))[0] # только имя файла
|
||||
with open(os.path.join(res_data,fn+".txt"), "a") as fh:
|
||||
# находим индекс списка с нужным объектом
|
||||
j = next(k for k, (_, x, _) in enumerate(obj_list) if x == cat_id)
|
||||
# формат: <target> <x-center> <y-center> <width> <height>
|
||||
fh.write(f"{obj_list[j][2]} {rel[0]} {rel[1]} {rel[2]} {rel[3]}\n")
|
||||
|
||||
# создадим файл описания датасета для darknet
|
||||
with open(os.path.join(res_dir,"yolov4_objs2.data"), "w") as fh:
|
||||
fh.write(f"classes = {n_obj}\n")
|
||||
fh.write("train = i_train.txt\n")
|
||||
fh.write("valid = i_val.txt\n")
|
||||
fh.write("names = obj.names\n")
|
||||
fh.write("backup = backup\n")
|
||||
fh.write("eval = coco\n")
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +0,0 @@
|
|||
classes= 1
|
||||
train = i_train.txt
|
||||
valid = i_val.txt
|
||||
names = obj.names
|
||||
backup = backup
|
||||
eval=coco
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +0,0 @@
|
|||
classes= 6
|
||||
train = i_train.txt
|
||||
valid = i_val.txt
|
||||
names = obj.names
|
||||
backup = backup
|
||||
eval=coco
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,44 +0,0 @@
|
|||
---
|
||||
id: BOP_dataset
|
||||
title: script for create BOP dataset
|
||||
---
|
||||
|
||||
## Структура входных данных:
|
||||
```
|
||||
<example_dir>/
|
||||
input_obj/asm_element_edge.mtl # файл материала
|
||||
input_obj/asm_element_edge.obj # меш-объект
|
||||
input_obj/fork.mtl
|
||||
input_obj/fork.obj
|
||||
input_obj/...
|
||||
resources/sklad.blend # файл сцены
|
||||
objs2BOPdataset.py # этот скрипт
|
||||
```
|
||||
|
||||
## Пример команды запуска скрипта:
|
||||
```
|
||||
cd <example_dir>/
|
||||
blenderproc run objs2BOPdataset.py resources/sklad.blend input_obj output --imgs 333
|
||||
```
|
||||
- resources/sklad.blend : файл сцены
|
||||
- input_obj : каталог с меш-файлами
|
||||
- output : выходной каталог
|
||||
- imgs : количество пакетов по 9 кадров в каждом (в примере 333 * 9 = 2997)
|
||||
|
||||
## Структура BOP датасета на выходе:
|
||||
```
|
||||
output/
|
||||
bop_data/
|
||||
train_pbr/
|
||||
000000/
|
||||
depth/... # файлы глубины
|
||||
mask/... # файлы маски
|
||||
mask_visib/... # файлы маски видимости
|
||||
rgb/... # файлы изображений RGB
|
||||
scene_camera.json
|
||||
scene_gt.json
|
||||
scene_gt_coco.json
|
||||
scene_gt_info.json
|
||||
camera.json # внутренние параметры камеры (для всего датасета)
|
||||
res.txt # протокол создания пакетов датасета
|
||||
```
|
|
@ -1,261 +0,0 @@
|
|||
import blenderproc as bproc
|
||||
"""
|
||||
objs2BOPdataset
|
||||
Общая задача: распознавание 6D позы объекта (6D pose estimation)
|
||||
Реализуемая функция: создание датасета в формате BOP для серии заданных объектов (*.obj) в заданной сцене (*.blend)
|
||||
Используется модуль blenderproc
|
||||
|
||||
29.08.2023 @shalenikol release 0.1
|
||||
12.10.2023 @shalenikol release 0.2
|
||||
"""
|
||||
import sys
|
||||
import numpy as np
|
||||
import argparse
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
Not_Categories_Name = True # наименование категории в аннотации отсутствует
|
||||
|
||||
def convert2relative(height, width, bbox):
|
||||
"""
|
||||
YOLO format use relative coordinates for annotation
|
||||
"""
|
||||
x, y, w, h = bbox
|
||||
x += w/2
|
||||
y += h/2
|
||||
return x/width, y/height, w/width, h/height
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('scene', nargs='?', default="resources/sklad.blend", help="Path to the scene object.")
|
||||
parser.add_argument('obj_path', nargs='?', default="resources/in_obj", help="Path to the object files.")
|
||||
parser.add_argument('output_dir', nargs='?', default="output", help="Path to where the final files, will be saved")
|
||||
parser.add_argument('vhacd_path', nargs='?', default="blenderproc_resources/vhacd", help="The directory in which vhacd should be installed or is already installed.")
|
||||
parser.add_argument('-single_object', nargs='?', type= bool, default=True, help="One object per frame.")
|
||||
parser.add_argument('--imgs', default=2, type=int, help="The number of times the objects should be rendered.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.obj_path):
|
||||
print(f"{args.obj_path} : no object directory")
|
||||
sys.exit()
|
||||
|
||||
if not os.path.isdir(args.output_dir):
|
||||
os.mkdir(args.output_dir)
|
||||
|
||||
single_object = args.single_object
|
||||
|
||||
bproc.init()
|
||||
|
||||
# ? загрузим свет из сцены
|
||||
#cam = bproc.loader.load_blend(args.scene, data_blocks=["cameras"])
|
||||
#lights = bproc.loader.load_blend(args.scene, data_blocks=["lights"])
|
||||
|
||||
# загрузим объекты
|
||||
list_files = os.listdir(args.obj_path)
|
||||
obj_names = []
|
||||
obj_filenames = []
|
||||
all_meshs = []
|
||||
nObj = 0
|
||||
for f in list_files:
|
||||
if (os.path.splitext(f))[1] == ".obj":
|
||||
f = os.path.join(args.obj_path, f) # путь к файлу объекта
|
||||
if os.path.isfile(f):
|
||||
obj = bproc.loader.load_obj(f)
|
||||
all_meshs += obj
|
||||
obj_names += [obj[0].get_name()]
|
||||
obj_filenames += [f]
|
||||
nObj += 1
|
||||
|
||||
if nObj == 0:
|
||||
print("Objects not found")
|
||||
sys.exit()
|
||||
|
||||
for i,obj in enumerate(all_meshs):
|
||||
#print(f"{i} *** {obj}")
|
||||
obj.set_cp("category_id", i+1)
|
||||
|
||||
# загрузим сцену
|
||||
scene = bproc.loader.load_blend(args.scene, data_blocks=["objects"])
|
||||
|
||||
# найдём объекты коллизии (пол и т.д.)
|
||||
obj_type = ["floor", "obj"]
|
||||
collision_objects = []
|
||||
#floor = None
|
||||
for o in scene:
|
||||
o.set_cp("category_id", 999)
|
||||
s = o.get_name()
|
||||
for type in obj_type:
|
||||
if s.find(type) >= 0:
|
||||
collision_objects += [o]
|
||||
o.enable_rigidbody(False, collision_shape='BOX')
|
||||
if not collision_objects:
|
||||
print("Collision objects not found in the scene")
|
||||
sys.exit()
|
||||
|
||||
#floor.enable_rigidbody(False, collision_shape='BOX')
|
||||
|
||||
for obj in all_meshs:
|
||||
# Make the object actively participate in the physics simulation
|
||||
obj.enable_rigidbody(active=True, collision_shape="COMPOUND")
|
||||
# Also use convex decomposition as collision shapes
|
||||
obj.build_convex_decomposition_collision_shape(args.vhacd_path)
|
||||
|
||||
objs = all_meshs + scene
|
||||
|
||||
with open(os.path.join(args.output_dir,"res.txt"), "w") as fh:
|
||||
# fh.write(str(type(scene[0]))+"\n")
|
||||
i = 0
|
||||
for o in objs:
|
||||
i += 1
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f"{i} : {o.get_name()} {loc} {euler} category_id = {o.get_cp('category_id')}\n")
|
||||
|
||||
# define a light and set its location and energy level
|
||||
light = bproc.types.Light()
|
||||
light.set_type("POINT")
|
||||
light.set_location([5, -5, 5])
|
||||
#light.set_energy(900)
|
||||
#light.set_color([0.7, 0.7, 0.7])
|
||||
|
||||
light1 = bproc.types.Light(name="light1")
|
||||
light1.set_type("SUN")
|
||||
light1.set_location([0, 0, 0])
|
||||
light1.set_rotation_euler([-0.063, 0.6177, -0.1985])
|
||||
#light1.set_energy(7)
|
||||
light1.set_color([1, 1, 1])
|
||||
|
||||
# define the camera intrinsics
|
||||
bproc.camera.set_intrinsics_from_blender_params(1, 640, 480, lens_unit="FOV")
|
||||
|
||||
# add segmentation masks (per class and per instance)
|
||||
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
|
||||
#bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name", "bop_dataset_name"],
|
||||
# default_values={"category_id": 0, "bop_dataset_name": None})
|
||||
|
||||
# activate depth rendering
|
||||
bproc.renderer.enable_depth_output(activate_antialiasing=False)
|
||||
|
||||
res_dir = os.path.join(args.output_dir, "bop_data")
|
||||
if os.path.isdir(res_dir):
|
||||
shutil.rmtree(res_dir)
|
||||
# Цикл рендеринга
|
||||
n_cam_location = 3 #5 # количество случайных локаций камеры
|
||||
n_cam_poses = 3 #3 # количество сэмплов для каждой локации камеры
|
||||
# Do multiple times: Position the shapenet objects using the physics simulator and render X images with random camera poses
|
||||
for r in range(args.imgs):
|
||||
# один случайный объект в кадре / все заданные объекты
|
||||
meshs = [random.choice(all_meshs)] if single_object else all_meshs[:]
|
||||
|
||||
# Randomly set the color and energy
|
||||
light.set_color(np.random.uniform([0.5, 0.5, 0.5], [1, 1, 1]))
|
||||
light.set_energy(random.uniform(500, 1000))
|
||||
light1.set_energy(random.uniform(3, 11))
|
||||
|
||||
for i,o in enumerate(meshs): #objs
|
||||
mat = o.get_materials()[0]
|
||||
mat.set_principled_shader_value("Specular", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Roughness", random.uniform(0, 1))
|
||||
mat.set_principled_shader_value("Base Color", np.random.uniform([0, 0, 0, 1], [1, 1, 1, 1]))
|
||||
mat.set_principled_shader_value("Metallic", random.uniform(0, 1))
|
||||
|
||||
# Clear all key frames from the previous run
|
||||
bproc.utility.reset_keyframes()
|
||||
|
||||
# Define a function that samples 6-DoF poses
|
||||
def sample_pose(obj: bproc.types.MeshObject):
|
||||
obj.set_location(np.random.uniform([-1, -1.5, 0.2], [1, 2, 1.2])) #[-1, -1, 0], [1, 1, 2]))
|
||||
obj.set_rotation_euler(bproc.sampler.uniformSO3())
|
||||
|
||||
# Sample the poses of all shapenet objects above the ground without any collisions in-between
|
||||
#bproc.object.sample_poses(meshs, objects_to_check_collisions = meshs + [floor], sample_pose_func = sample_pose)
|
||||
bproc.object.sample_poses(meshs, objects_to_check_collisions = meshs + collision_objects, sample_pose_func = sample_pose)
|
||||
|
||||
# Run the simulation and fix the poses of the shapenet objects at the end
|
||||
bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)
|
||||
|
||||
# Find point of interest, all cam poses should look towards it
|
||||
poi = bproc.object.compute_poi(meshs)
|
||||
|
||||
coord_max = [0.1, 0.1, 0.1]
|
||||
coord_min = [0., 0., 0.]
|
||||
|
||||
with open(os.path.join(args.output_dir,"res.txt"), "a") as fh:
|
||||
fh.write("*****************\n")
|
||||
fh.write(f"{r}) poi = {poi}\n")
|
||||
i = 0
|
||||
for o in meshs:
|
||||
i += 1
|
||||
loc = o.get_location()
|
||||
euler = o.get_rotation_euler()
|
||||
fh.write(f" {i} : {o.get_name()} {loc} {euler}\n")
|
||||
for j in range(3):
|
||||
if loc[j] < coord_min[j]:
|
||||
coord_min[j] = loc[j]
|
||||
if loc[j] > coord_max[j]:
|
||||
coord_max[j] = loc[j]
|
||||
|
||||
# Sample up to X camera poses
|
||||
#an = np.random.uniform(0.78, 1.2) #1. #0.35
|
||||
for i in range(n_cam_location):
|
||||
# Sample location
|
||||
location = bproc.sampler.shell(center=[0, 0, 0],
|
||||
radius_min=1.1,
|
||||
radius_max=2.2,
|
||||
elevation_min=5,
|
||||
elevation_max=89)
|
||||
# координата, по которой будем сэмплировать положение камеры
|
||||
j = random.randint(0, 2)
|
||||
# разовый сдвиг по случайной координате
|
||||
d = (coord_max[j] - coord_min[j]) / n_cam_poses
|
||||
if location[j] < 0:
|
||||
d = -d
|
||||
for k in range(n_cam_poses):
|
||||
# Compute rotation based on vector going from location towards poi
|
||||
rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
|
||||
# Add homog cam pose based on location an rotation
|
||||
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
|
||||
bproc.camera.add_camera_pose(cam2world_matrix)
|
||||
location[j] -= d
|
||||
#world_matrix = bproc.math.build_transformation_mat([2.3, -0.4, 0.66], [1.396, 0., an])
|
||||
#bproc.camera.add_camera_pose(world_matrix)
|
||||
#an += 0.2
|
||||
|
||||
# render the whole pipeline
|
||||
data = bproc.renderer.render()
|
||||
# Write data to bop format
|
||||
bproc.writer.write_bop(res_dir,
|
||||
target_objects = all_meshs, # Optional[List[MeshObject]] = None
|
||||
depths = data["depth"],
|
||||
depth_scale = 1.0,
|
||||
colors = data["colors"],
|
||||
color_file_format='JPEG',
|
||||
append_to_existing_output = (r>0),
|
||||
save_world2cam = False) # world coords are arbitrary in most real BOP datasets
|
||||
# dataset="robo_ds",
|
||||
"""
|
||||
!!! categories -> name берётся из category_id !!!
|
||||
см.ниже
|
||||
blenderproc.python.writer : BopWriterUtility.py
|
||||
class _BopWriterUtility
|
||||
def calc_gt_coco
|
||||
...
|
||||
CATEGORIES = [{'id': obj.get_cp('category_id'), 'name': str(obj.get_cp('category_id')), 'supercategory':
|
||||
dataset_name} for obj in dataset_objects]
|
||||
|
||||
поэтому заменим наименование категории в аннотации
|
||||
"""
|
||||
if Not_Categories_Name:
|
||||
coco_file = os.path.join(res_dir,"train_pbr/000000/scene_gt_coco.json")
|
||||
with open(coco_file, "r") as fh:
|
||||
data = json.load(fh)
|
||||
cats = data["categories"]
|
||||
#print(f"type(cat) = {type(cat)} cat : {cat}")
|
||||
i = 0
|
||||
for cat in cats:
|
||||
cat["name"] = obj_names[i]
|
||||
i += 1
|
||||
#print(cat)
|
||||
with open(coco_file, "w") as fh:
|
||||
json.dump(data, fh, indent=0)
|
|
@ -1,196 +0,0 @@
|
|||
"""
|
||||
NVIDIA from jtremblay@gmail.com
|
||||
"""
|
||||
|
||||
# Networks
|
||||
import torch
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.utils.data
|
||||
import torchvision.models as models
|
||||
|
||||
|
||||
class DopeNetwork(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
pretrained=False,
|
||||
numBeliefMap=9,
|
||||
numAffinity=16,
|
||||
stop_at_stage=6, # number of stages to process (if less than total number of stages)
|
||||
):
|
||||
super(DopeNetwork, self).__init__()
|
||||
|
||||
self.stop_at_stage = stop_at_stage
|
||||
|
||||
vgg_full = models.vgg19(pretrained=False).features
|
||||
self.vgg = nn.Sequential()
|
||||
for i_layer in range(24):
|
||||
self.vgg.add_module(str(i_layer), vgg_full[i_layer])
|
||||
|
||||
# Add some layers
|
||||
i_layer = 23
|
||||
self.vgg.add_module(
|
||||
str(i_layer), nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
|
||||
)
|
||||
self.vgg.add_module(str(i_layer + 1), nn.ReLU(inplace=True))
|
||||
self.vgg.add_module(
|
||||
str(i_layer + 2), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
|
||||
)
|
||||
self.vgg.add_module(str(i_layer + 3), nn.ReLU(inplace=True))
|
||||
|
||||
# print('---Belief------------------------------------------------')
|
||||
# _2 are the belief map stages
|
||||
self.m1_2 = DopeNetwork.create_stage(128, numBeliefMap, True)
|
||||
self.m2_2 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
||||
)
|
||||
self.m3_2 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
||||
)
|
||||
self.m4_2 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
||||
)
|
||||
self.m5_2 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
||||
)
|
||||
self.m6_2 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
||||
)
|
||||
|
||||
# print('---Affinity----------------------------------------------')
|
||||
# _1 are the affinity map stages
|
||||
self.m1_1 = DopeNetwork.create_stage(128, numAffinity, True)
|
||||
self.m2_1 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numAffinity, False
|
||||
)
|
||||
self.m3_1 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numAffinity, False
|
||||
)
|
||||
self.m4_1 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numAffinity, False
|
||||
)
|
||||
self.m5_1 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numAffinity, False
|
||||
)
|
||||
self.m6_1 = DopeNetwork.create_stage(
|
||||
128 + numBeliefMap + numAffinity, numAffinity, False
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
"""Runs inference on the neural network"""
|
||||
|
||||
out1 = self.vgg(x)
|
||||
|
||||
out1_2 = self.m1_2(out1)
|
||||
out1_1 = self.m1_1(out1)
|
||||
|
||||
if self.stop_at_stage == 1:
|
||||
return [out1_2], [out1_1]
|
||||
|
||||
out2 = torch.cat([out1_2, out1_1, out1], 1)
|
||||
out2_2 = self.m2_2(out2)
|
||||
out2_1 = self.m2_1(out2)
|
||||
|
||||
if self.stop_at_stage == 2:
|
||||
return [out1_2, out2_2], [out1_1, out2_1]
|
||||
|
||||
out3 = torch.cat([out2_2, out2_1, out1], 1)
|
||||
out3_2 = self.m3_2(out3)
|
||||
out3_1 = self.m3_1(out3)
|
||||
|
||||
if self.stop_at_stage == 3:
|
||||
return [out1_2, out2_2, out3_2], [out1_1, out2_1, out3_1]
|
||||
|
||||
out4 = torch.cat([out3_2, out3_1, out1], 1)
|
||||
out4_2 = self.m4_2(out4)
|
||||
out4_1 = self.m4_1(out4)
|
||||
|
||||
if self.stop_at_stage == 4:
|
||||
return [out1_2, out2_2, out3_2, out4_2], [out1_1, out2_1, out3_1, out4_1]
|
||||
|
||||
out5 = torch.cat([out4_2, out4_1, out1], 1)
|
||||
out5_2 = self.m5_2(out5)
|
||||
out5_1 = self.m5_1(out5)
|
||||
|
||||
if self.stop_at_stage == 5:
|
||||
return [out1_2, out2_2, out3_2, out4_2, out5_2], [
|
||||
out1_1,
|
||||
out2_1,
|
||||
out3_1,
|
||||
out4_1,
|
||||
out5_1,
|
||||
]
|
||||
|
||||
out6 = torch.cat([out5_2, out5_1, out1], 1)
|
||||
out6_2 = self.m6_2(out6)
|
||||
out6_1 = self.m6_1(out6)
|
||||
|
||||
return [out1_2, out2_2, out3_2, out4_2, out5_2, out6_2], [
|
||||
out1_1,
|
||||
out2_1,
|
||||
out3_1,
|
||||
out4_1,
|
||||
out5_1,
|
||||
out6_1,
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def create_stage(in_channels, out_channels, first=False):
|
||||
"""Create the neural network layers for a single stage."""
|
||||
|
||||
model = nn.Sequential()
|
||||
mid_channels = 128
|
||||
if first:
|
||||
padding = 1
|
||||
kernel = 3
|
||||
count = 6
|
||||
final_channels = 512
|
||||
else:
|
||||
padding = 3
|
||||
kernel = 7
|
||||
count = 10
|
||||
final_channels = mid_channels
|
||||
|
||||
# First convolution
|
||||
model.add_module(
|
||||
"0",
|
||||
nn.Conv2d(
|
||||
in_channels, mid_channels, kernel_size=kernel, stride=1, padding=padding
|
||||
),
|
||||
)
|
||||
|
||||
# Middle convolutions
|
||||
i = 1
|
||||
while i < count - 1:
|
||||
model.add_module(str(i), nn.ReLU(inplace=True))
|
||||
i += 1
|
||||
model.add_module(
|
||||
str(i),
|
||||
nn.Conv2d(
|
||||
mid_channels,
|
||||
mid_channels,
|
||||
kernel_size=kernel,
|
||||
stride=1,
|
||||
padding=padding,
|
||||
),
|
||||
)
|
||||
i += 1
|
||||
|
||||
# Penultimate convolution
|
||||
model.add_module(str(i), nn.ReLU(inplace=True))
|
||||
i += 1
|
||||
model.add_module(
|
||||
str(i), nn.Conv2d(mid_channels, final_channels, kernel_size=1, stride=1)
|
||||
)
|
||||
i += 1
|
||||
|
||||
# Last convolution
|
||||
model.add_module(str(i), nn.ReLU(inplace=True))
|
||||
i += 1
|
||||
model.add_module(
|
||||
str(i), nn.Conv2d(final_channels, out_channels, kernel_size=1, stride=1)
|
||||
)
|
||||
i += 1
|
||||
|
||||
return model
|
|
@ -1,29 +0,0 @@
|
|||
"""
|
||||
rbs_train
|
||||
Общая задача: web-service pipeline
|
||||
Реализуемая функция: обучение нейросетевой модели по заданному BOP-датасету
|
||||
|
||||
python3 $PYTHON_EDUCATION --path /Users/idontsudo/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \
|
||||
--name test1234 --datasetName 32123213
|
||||
|
||||
27.04.2024 @shalenikol release 0.1
|
||||
"""
|
||||
import argparse
|
||||
from train_Yolo import train_YoloV8
|
||||
from train_Dope import train_Dope_i
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--path", required=True, help="Path for dataset")
|
||||
parser.add_argument("--name", required=True, help="String with result weights name")
|
||||
parser.add_argument("--datasetName", required=True, help="String with dataset name")
|
||||
parser.add_argument("--outpath", default="weights", help="Output path for weights")
|
||||
parser.add_argument("--type", default="ObjectDetection", help="Type of implementation")
|
||||
parser.add_argument("--epoch", default=3, type=int, help="How many training epochs")
|
||||
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.type == "ObjectDetection":
|
||||
train_YoloV8(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)
|
||||
else:
|
||||
train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)
|
|
@ -1,542 +0,0 @@
|
|||
"""
|
||||
train_Dope
|
||||
Общая задача: оценка позиции объекта (Pose estimation)
|
||||
Реализуемая функция: обучение нейросетевой модели DOPE по заданному BOP-датасету
|
||||
|
||||
python3 $PYTHON_EDUCATION --path /Users/user/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \
|
||||
--name test1234 --datasetName 32123213
|
||||
|
||||
08.05.2024 @shalenikol release 0.1
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import numpy as np
|
||||
import transforms3d as t3d
|
||||
|
||||
FILE_RBS_INFO = "rbs_info.json"
|
||||
FILE_CAMERA = "camera.json"
|
||||
FILE_GT = "scene_gt.json"
|
||||
FILE_GT_COCO = "scene_gt_coco.json"
|
||||
FILE_GT_INFO = "scene_gt_info.json"
|
||||
|
||||
FILE_MODEL = "epoch"
|
||||
EXT_MODEL = ".pth"
|
||||
EXT_RGB = "jpg"
|
||||
DIR_ROOT_DS = "dataset_dope"
|
||||
DIR_TRAIN_OUT = "out_weights"
|
||||
|
||||
MODEL_SCALE = 1000 # исходная модель в метрах, преобразуем в мм (для DOPE)
|
||||
|
||||
# Own_Numbering_Files = True # наименование image-файлов: собственная нумерация
|
||||
nn_image = 0
|
||||
K_intrinsic = []
|
||||
model_info = []
|
||||
camera_data = {}
|
||||
im_width = 0
|
||||
|
||||
nb_update_network = 0
|
||||
# [
|
||||
# [min(x), min(y), min(z)],
|
||||
# [min(x), max(y), min(z)],
|
||||
# [min(x), max(y), max(z)],
|
||||
# [min(x), min(y), max(z)],
|
||||
# [max(x), min(y), max(z)],
|
||||
# [max(x), max(y), min(z)],
|
||||
# [max(x), max(y), max(z)],
|
||||
# [max(x), min(y), max(z)],
|
||||
# [xc, yc, zc] # min + (max - min) / 2
|
||||
# ]
|
||||
|
||||
def trans_3Dto2D_point_in_camera(xyz, K_m, R_m2c, t_m2c):
|
||||
"""
|
||||
xyz : 3D-координаты точки
|
||||
K_m : внутренняя матрица камеры 3х3
|
||||
R_m2c : матрица поворота 3х3
|
||||
t_m2c : вектор перемещения 3х1
|
||||
return [u,v]
|
||||
"""
|
||||
K = np.array(K_m)
|
||||
r = np.array(R_m2c)
|
||||
r.shape = (3, 3)
|
||||
t = np.array(t_m2c)
|
||||
t.shape = (3, 1)
|
||||
T = np.concatenate((r, t), axis=1)
|
||||
|
||||
P_m = np.array(xyz)
|
||||
P_m.resize(4)
|
||||
P_m[-1] = 1.0
|
||||
P_m.shape = (4, 1)
|
||||
|
||||
# Project (X, Y, Z, 1) into cameras coordinate system
|
||||
P_c = T @ P_m # 4x1
|
||||
# Apply camera intrinsics to map (Xc, Yc, Zc) to p=(x, y, z)
|
||||
p = K @ P_c
|
||||
# Normalize by z to get (u,v,1)
|
||||
uv = (p / p[2][0])[:-1]
|
||||
return uv.flatten().tolist()
|
||||
|
||||
def gt_parse(path: str, out_dir: str):
|
||||
global nn_image
|
||||
with open(os.path.join(path, FILE_GT_COCO), "r") as fh:
|
||||
coco_data = json.load(fh)
|
||||
with open(os.path.join(path, FILE_GT), "r") as fh:
|
||||
gt_data = json.load(fh)
|
||||
with open(os.path.join(path, FILE_GT_INFO), "r") as fh:
|
||||
gt_info = json.load(fh)
|
||||
|
||||
for img in coco_data["images"]:
|
||||
rgb_file = os.path.join(path, img["file_name"])
|
||||
if os.path.isfile(rgb_file):
|
||||
# if Own_Numbering_Files:
|
||||
ext = os.path.splitext(rgb_file)[1] # only ext
|
||||
f = f"{nn_image:06}"
|
||||
out_img = os.path.join(out_dir, f + ext)
|
||||
# else:
|
||||
# f = os.path.split(rgb_file)[1] # filename with extension
|
||||
# f = os.path.splitext(f)[0] # only filename
|
||||
# out_img = out_dir
|
||||
shutil.copy2(rgb_file, out_img)
|
||||
out_file = os.path.join(out_dir,f+".json")
|
||||
nn_image += 1
|
||||
|
||||
# full annotation of the one image
|
||||
all_data = camera_data.copy()
|
||||
cat_names = {obj["id"]: obj["name"] for obj in coco_data["categories"]}
|
||||
id_img = img["id"] # 0, 1, 2 ...
|
||||
sid_img = str(id_img) # "0", "1", "2" ...
|
||||
img_info = gt_info[sid_img]
|
||||
img_gt = gt_data[sid_img]
|
||||
img_idx = 0 # object index on the image
|
||||
objs = []
|
||||
for ann in coco_data["annotations"]:
|
||||
if ann["image_id"] == id_img:
|
||||
item = ann["category_id"]
|
||||
obj_data = {}
|
||||
obj_data["class"] = cat_names[item]
|
||||
x, y, width, height = ann["bbox"]
|
||||
obj_data["bounding_box"] = {"top_left":[x,y], "bottom_right":[x+width,y+height]}
|
||||
|
||||
# visibility from FILE_GT_INFO
|
||||
item_info = img_info[img_idx]
|
||||
obj_data["visibility"] = item_info["visib_fract"]
|
||||
|
||||
# location from FILE_GT
|
||||
item_gt = img_gt[img_idx]
|
||||
obj_id = item_gt["obj_id"] - 1 # index with 0
|
||||
cam_R_m2c = item_gt["cam_R_m2c"]
|
||||
cam_t_m2c = item_gt["cam_t_m2c"]
|
||||
obj_data["location"] = cam_t_m2c
|
||||
q = t3d.quaternions.mat2quat(np.array(cam_R_m2c))
|
||||
obj_data["quaternion_xyzw"] = [q[1], q[2], q[3], q[0]]
|
||||
|
||||
cuboid_xyz = model_info[obj_id]
|
||||
obj_data["projected_cuboid"] = [
|
||||
trans_3Dto2D_point_in_camera(cub, K_intrinsic, cam_R_m2c, cam_t_m2c)
|
||||
for cub in cuboid_xyz
|
||||
]
|
||||
|
||||
objs.append(obj_data)
|
||||
img_idx += 1
|
||||
|
||||
all_data["objects"] = objs
|
||||
with open(out_file, "w") as fh:
|
||||
json.dump(all_data, fh, indent=2)
|
||||
|
||||
def explore(path: str, res_dir: str):
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
folders = [
|
||||
os.path.join(path, o)
|
||||
for o in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, o))
|
||||
]
|
||||
for path_entry in folders:
|
||||
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)) and \
|
||||
os.path.isfile(os.path.join(path_entry,FILE_GT_INFO)) and \
|
||||
os.path.isfile(os.path.join(path_entry,FILE_GT)):
|
||||
gt_parse(path_entry, res_dir)
|
||||
else:
|
||||
explore(path_entry, res_dir)
|
||||
|
||||
def BOP2DOPE_dataset(dpath: str, out_dir: str) -> str:
|
||||
""" Convert BOP-dataset to YOLO format for train """
|
||||
res_dir = os.path.join(out_dir, DIR_ROOT_DS)
|
||||
if os.path.isdir(res_dir):
|
||||
shutil.rmtree(res_dir)
|
||||
os.mkdir(res_dir)
|
||||
|
||||
explore(dpath, res_dir)
|
||||
|
||||
return out_dir
|
||||
|
||||
def train(dopepath:str, wname:str, epochs:int, pretrain: bool, lname: list):
|
||||
import random
|
||||
# try:
|
||||
import configparser as configparser
|
||||
# except ImportError:
|
||||
# import ConfigParser as configparser
|
||||
import torch
|
||||
# import torch.nn.parallel
|
||||
import torch.optim as optim
|
||||
import torch.utils.data
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
import datetime
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
from models_dope import DopeNetwork
|
||||
from utils_dope import CleanVisiiDopeLoader #, VisualizeBeliefMap, save_image
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
|
||||
|
||||
torch.autograd.set_detect_anomaly(False)
|
||||
torch.autograd.profiler.profile(False)
|
||||
torch.autograd.gradcheck = False
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
print("start:", start_time.strftime("%m/%d/%Y, %H:%M:%S"))
|
||||
|
||||
res_model = os.path.join(dopepath, wname + EXT_MODEL)
|
||||
|
||||
local_rank = 0
|
||||
opt = lambda: None
|
||||
opt.use_s3 = False
|
||||
opt.train_buckets = []
|
||||
opt.endpoint = None
|
||||
opt.lr=0.0001
|
||||
opt.loginterval=100
|
||||
opt.sigma=0.5 # 4
|
||||
opt.nbupdates=None
|
||||
# opt.save=False
|
||||
# opt.option="default"
|
||||
# opt.gpuids=[0]
|
||||
|
||||
opt.namefile=FILE_MODEL
|
||||
opt.workers=8
|
||||
opt.batchsize=16
|
||||
|
||||
opt.data = [os.path.join(dopepath, DIR_ROOT_DS)]
|
||||
opt.outf = os.path.join(dopepath, DIR_TRAIN_OUT)
|
||||
opt.object = lname #["fork"]
|
||||
opt.exts = [EXT_RGB]
|
||||
# opt.imagesize = im_width
|
||||
opt.epochs = epochs
|
||||
opt.pretrained = pretrain
|
||||
opt.net_path = res_model if pretrain else None
|
||||
opt.manualseed = random.randint(1, 10000)
|
||||
|
||||
# # Validate Arguments
|
||||
# if opt.use_s3 and (opt.train_buckets is None or opt.endpoint is None):
|
||||
# raise ValueError(
|
||||
# "--train_buckets and --endpoint must be specified if training with data from s3 bucket."
|
||||
# )
|
||||
# if not opt.use_s3 and opt.data is None:
|
||||
# raise ValueError("--data field must be specified.")
|
||||
|
||||
os.makedirs(opt.outf, exist_ok=True)
|
||||
|
||||
# if local_rank == 0:
|
||||
# writer = SummaryWriter(opt.outf + "/runs/")
|
||||
random.seed(opt.manualseed)
|
||||
torch.cuda.set_device(local_rank)
|
||||
# torch.distributed.init_process_group(backend="nccl", init_method="env://")
|
||||
torch.manual_seed(opt.manualseed)
|
||||
torch.cuda.manual_seed_all(opt.manualseed)
|
||||
|
||||
# # Data Augmentation
|
||||
# if not opt.save:
|
||||
# contrast = 0.2
|
||||
# brightness = 0.2
|
||||
# noise = 0.1
|
||||
# normal_imgs = [0.59, 0.25]
|
||||
# transform = transforms.Compose(
|
||||
# [
|
||||
# AddRandomContrast(0.2),
|
||||
# AddRandomBrightness(0.2),
|
||||
# transforms.Resize(opt.imagesize),
|
||||
# ]
|
||||
# )
|
||||
# else:
|
||||
# contrast = 0.00001
|
||||
# brightness = 0.00001
|
||||
# noise = 0.00001
|
||||
# normal_imgs = None
|
||||
# transform = transforms.Compose(
|
||||
# [transforms.Resize(opt.imagesize), transforms.ToTensor()]
|
||||
# )
|
||||
|
||||
# Load Model
|
||||
net = DopeNetwork()
|
||||
output_size = 50
|
||||
# opt.sigma = 0.5
|
||||
|
||||
train_dataset = CleanVisiiDopeLoader(
|
||||
opt.data,
|
||||
sigma=opt.sigma,
|
||||
output_size=output_size,
|
||||
extensions=opt.exts,
|
||||
objects=opt.object,
|
||||
use_s3=opt.use_s3,
|
||||
buckets=opt.train_buckets,
|
||||
endpoint_url=opt.endpoint,
|
||||
)
|
||||
trainingdata = torch.utils.data.DataLoader(
|
||||
train_dataset,
|
||||
batch_size=opt.batchsize,
|
||||
shuffle=True,
|
||||
num_workers=opt.workers,
|
||||
pin_memory=True,
|
||||
)
|
||||
if not trainingdata is None:
|
||||
print(f"training data: {len(trainingdata)} batches")
|
||||
|
||||
print("Loading Model...")
|
||||
net = net.cuda()
|
||||
# net = torch.nn.parallel.DistributedDataParallel(
|
||||
# net.cuda(), device_ids=[local_rank], output_device=local_rank
|
||||
# )
|
||||
if opt.pretrained:
|
||||
if opt.net_path is not None:
|
||||
net.load_state_dict(torch.load(opt.net_path))
|
||||
else:
|
||||
print("Error: Did not specify path to pretrained weights.")
|
||||
quit()
|
||||
|
||||
parameters = filter(lambda p: p.requires_grad, net.parameters())
|
||||
optimizer = optim.Adam(parameters, lr=opt.lr)
|
||||
|
||||
print("ready to train!")
|
||||
|
||||
global nb_update_network
|
||||
nb_update_network = 0
|
||||
# best_results = {"epoch": None, "passed": None, "add_mean": None, "add_std": None}
|
||||
|
||||
scaler = torch.cuda.amp.GradScaler()
|
||||
|
||||
def _runnetwork(epoch, train_loader): #, syn=False
|
||||
global nb_update_network
|
||||
# net
|
||||
net.train()
|
||||
|
||||
loss_avg_to_log = {}
|
||||
loss_avg_to_log["loss"] = []
|
||||
loss_avg_to_log["loss_affinities"] = []
|
||||
loss_avg_to_log["loss_belief"] = []
|
||||
loss_avg_to_log["loss_class"] = []
|
||||
for batch_idx, targets in enumerate(train_loader):
|
||||
optimizer.zero_grad()
|
||||
|
||||
data = Variable(targets["img"].cuda())
|
||||
target_belief = Variable(targets["beliefs"].cuda())
|
||||
target_affinities = Variable(targets["affinities"].cuda())
|
||||
|
||||
output_belief, output_aff = net(data)
|
||||
|
||||
loss = None
|
||||
|
||||
loss_belief = torch.tensor(0).float().cuda()
|
||||
loss_affinities = torch.tensor(0).float().cuda()
|
||||
loss_class = torch.tensor(0).float().cuda()
|
||||
|
||||
for stage in range(len(output_aff)): # output, each belief map layers.
|
||||
loss_affinities += (
|
||||
(output_aff[stage] - target_affinities)
|
||||
* (output_aff[stage] - target_affinities)
|
||||
).mean()
|
||||
|
||||
loss_belief += (
|
||||
(output_belief[stage] - target_belief)
|
||||
* (output_belief[stage] - target_belief)
|
||||
).mean()
|
||||
|
||||
loss = loss_affinities + loss_belief
|
||||
|
||||
# if batch_idx == 0:
|
||||
# post = "train"
|
||||
# if local_rank == 0:
|
||||
# for i_output in range(1):
|
||||
# # input images
|
||||
# writer.add_image(
|
||||
# f"{post}_input_{i_output}",
|
||||
# targets["img_original"][i_output],
|
||||
# epoch,
|
||||
# dataformats="CWH",
|
||||
# )
|
||||
# # belief maps gt
|
||||
# imgs = VisualizeBeliefMap(target_belief[i_output])
|
||||
# img, grid = save_image(
|
||||
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
|
||||
# )
|
||||
# writer.add_image(
|
||||
# f"{post}_belief_ground_truth_{i_output}",
|
||||
# grid,
|
||||
# epoch,
|
||||
# dataformats="CWH",
|
||||
# )
|
||||
# # belief maps guess
|
||||
# imgs = VisualizeBeliefMap(output_belief[-1][i_output])
|
||||
# img, grid = save_image(
|
||||
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
|
||||
# )
|
||||
# writer.add_image(
|
||||
# f"{post}_belief_guess_{i_output}",
|
||||
# grid,
|
||||
# epoch,
|
||||
# dataformats="CWH",
|
||||
# )
|
||||
|
||||
loss.backward()
|
||||
|
||||
optimizer.step()
|
||||
|
||||
nb_update_network += 1
|
||||
|
||||
# log the loss
|
||||
loss_avg_to_log["loss"].append(loss.item())
|
||||
loss_avg_to_log["loss_class"].append(loss_class.item())
|
||||
loss_avg_to_log["loss_affinities"].append(loss_affinities.item())
|
||||
loss_avg_to_log["loss_belief"].append(loss_belief.item())
|
||||
|
||||
if batch_idx % opt.loginterval == 0:
|
||||
print(
|
||||
"Train Epoch: {} [{}/{} ({:.0f}%)] \tLoss: {:.15f} \tLocal Rank: {}".format(
|
||||
epoch,
|
||||
batch_idx * len(data),
|
||||
len(train_loader.dataset),
|
||||
100.0 * batch_idx / len(train_loader),
|
||||
loss.item(),
|
||||
local_rank,
|
||||
)
|
||||
)
|
||||
# # log the loss values
|
||||
# if local_rank == 0:
|
||||
# writer.add_scalar("loss/train_loss", np.mean(loss_avg_to_log["loss"]), epoch)
|
||||
# writer.add_scalar("loss/train_cls", np.mean(loss_avg_to_log["loss_class"]), epoch)
|
||||
# writer.add_scalar("loss/train_aff", np.mean(loss_avg_to_log["loss_affinities"]), epoch)
|
||||
# writer.add_scalar("loss/train_bel", np.mean(loss_avg_to_log["loss_belief"]), epoch)
|
||||
|
||||
for epoch in range(1, opt.epochs + 1):
|
||||
|
||||
_runnetwork(epoch, trainingdata)
|
||||
|
||||
try:
|
||||
if local_rank == 0:
|
||||
torch.save(
|
||||
net.state_dict(),
|
||||
f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}.pth",
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Encountered Exception: {e}")
|
||||
|
||||
if not opt.nbupdates is None and nb_update_network > int(opt.nbupdates):
|
||||
break
|
||||
|
||||
# if local_rank == 0:
|
||||
# save result model
|
||||
torch.save(net.state_dict(), res_model) #os.path.join(dopepath, wname + EXT_MODEL))
|
||||
# else:
|
||||
# torch.save(
|
||||
# net.state_dict(),
|
||||
# f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}_rank_{local_rank}.pth",
|
||||
# )
|
||||
|
||||
print("end:", datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))
|
||||
print("Total time taken: ", str(datetime.datetime.now() - start_time).split(".")[0])
|
||||
|
||||
def train_Dope_i(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool):
|
||||
""" Main procedure for train DOPE model """
|
||||
global K_intrinsic, model_info, camera_data, im_width
|
||||
|
||||
if not os.path.isdir(outpath):
|
||||
print(f"Invalid output path '{outpath}'")
|
||||
exit(-1)
|
||||
out_dir = os.path.join(outpath, wname)
|
||||
ds_path = os.path.join(path, dname)
|
||||
|
||||
if not os.path.isdir(ds_path):
|
||||
print(f"{ds_path} : no BOP directory")
|
||||
return ""
|
||||
|
||||
camera_json = os.path.join(ds_path, FILE_CAMERA)
|
||||
if not os.path.isfile(camera_json):
|
||||
print(f"{camera_json} : no intrinsic camera file")
|
||||
return ""
|
||||
|
||||
rbs_info = os.path.join(ds_path, FILE_RBS_INFO)
|
||||
if not os.path.isfile(rbs_info):
|
||||
print(f"{rbs_info} : no dataset info file")
|
||||
return ""
|
||||
|
||||
camera_data = {}
|
||||
with open(camera_json, "r") as fh:
|
||||
data = json.load(fh)
|
||||
keys = ["cx","cy","fx","fy"]
|
||||
intrinsic = {k: data[k] for k in keys}
|
||||
im_height = data["height"]
|
||||
im_width = data["width"]
|
||||
camera_data["camera_data"] = dict(intrinsic=intrinsic, height=im_height, width=im_width)
|
||||
K_intrinsic = [
|
||||
[data["fx"], 0.0, data["cx"]],
|
||||
[0.0, data["fy"], data["cy"]],
|
||||
[0.0, 0.0, 1.0]
|
||||
]
|
||||
# calc cuboid + center
|
||||
with open(rbs_info, "r") as fh:
|
||||
info = json.load(fh)
|
||||
# список имён объектов
|
||||
list_name = list(map(lambda x: x["name"], info))
|
||||
# in FILE_RBS_INFO model numbering from smallest to largest
|
||||
model_info = []
|
||||
for m_info in info:
|
||||
cub = np.array(m_info["cuboid"]) * MODEL_SCALE
|
||||
xyz_min = cub.min(axis=0)
|
||||
xyz_max = cub.max(axis=0)
|
||||
# [xc, yc, zc] # min + (max - min) / 2
|
||||
center = []
|
||||
for i in range(3):
|
||||
center.append(xyz_min[i] + (xyz_max[i]- xyz_min[i]) / 2)
|
||||
c = np.array(center, ndmin=2)
|
||||
model_info.append(np.append(cub, c, axis=0))
|
||||
|
||||
if pretrain:
|
||||
# продолжить обучение
|
||||
if not os.path.isdir(out_dir):
|
||||
print(f"No dir '{out_dir}'")
|
||||
exit(-2)
|
||||
dpath = out_dir
|
||||
# model_path = os.path.join(dpath, wname + ".pt")
|
||||
else:
|
||||
# обучение сначала
|
||||
if not os.path.isdir(out_dir):
|
||||
os.mkdir(out_dir)
|
||||
|
||||
dpath = BOP2DOPE_dataset(ds_path, out_dir)
|
||||
if len(dpath) == 0:
|
||||
print(f"Error in convert dataset '{ds_path}' to '{outpath}'")
|
||||
exit(-4)
|
||||
# model_path = os.path.join(dpath, FILE_BASEMODEL)
|
||||
|
||||
# results = f"python train.py --local_rank 0 --data {dpath} --object fork" \
|
||||
# + f" -e {epochs} --batchsize 16 --exts jpg --imagesize 640 --pretrained" \
|
||||
# + " --net_path /home/shalenikol/fork_work/dope_training/output/weights_2996/net_epoch_47.pth"
|
||||
# print(results)
|
||||
train(dpath, wname, epochs, pretrain, list_name)
|
||||
|
||||
import argparse
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--path", required=True, help="Path for dataset")
|
||||
parser.add_argument("--name", required=True, help="String with result weights name")
|
||||
parser.add_argument("--datasetName", required=True, help="String with dataset name")
|
||||
parser.add_argument("--outpath", default="weights", help="Output path for weights")
|
||||
parser.add_argument("--epoch", default=3, help="How many training epochs")
|
||||
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
|
||||
args = parser.parse_args()
|
||||
|
||||
train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)
|
|
@ -1,181 +0,0 @@
|
|||
"""
|
||||
train_Yolo
|
||||
Общая задача: обнаружение объекта (Object detection)
|
||||
Реализуемая функция: обучение нейросетевой модели YoloV8 по заданному BOP-датасету
|
||||
|
||||
python3 $PYTHON_TRAIN --path /Users/idontsudo/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296/datasets \
|
||||
--name test123 --datasetName ds213 --outpath /Users/idontsudo/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296/weights
|
||||
|
||||
27.04.2024 @shalenikol release 0.1
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from ultralytics import YOLO
|
||||
# from ultralytics.utils.metrics import DetMetrics
|
||||
|
||||
FILE_BASEMODEL = "yolov8n.pt"
|
||||
FILE_RBS_INFO = "rbs_info.json"
|
||||
FILE_RBS_TRAIN = "rbs_train.yaml"
|
||||
FILE_GT_COCO = "scene_gt_coco.json"
|
||||
FILE_L_TRAIN = "i_train.txt"
|
||||
FILE_L_VAL = "i_val.txt"
|
||||
FILE_TRAIN_RES = "weights/last.pt"
|
||||
DIR_ROOT_DS = "datasets"
|
||||
DIR_COCO_DS = "rbs_coco"
|
||||
DIR_RGB_DS = "images"
|
||||
DIR_LABELS_DS = "labels"
|
||||
|
||||
SZ_SERIES = 15 # number of train images per validation images
|
||||
|
||||
nn_image = 0
|
||||
f1 = f2 = None
|
||||
|
||||
def convert2relative(height, width, bbox):
|
||||
""" YOLO format use relative coordinates for annotation """
|
||||
x, y, w, h = bbox
|
||||
x += w/2
|
||||
y += h/2
|
||||
return x/width, y/height, w/width, h/height
|
||||
|
||||
def gt_parse(path: str, out_dir: str):
|
||||
global nn_image, f1, f2
|
||||
with open(os.path.join(path, FILE_GT_COCO), "r") as fh:
|
||||
coco_data = json.load(fh)
|
||||
|
||||
for img in coco_data["images"]:
|
||||
rgb_file = os.path.join(path, img["file_name"])
|
||||
if os.path.isfile(rgb_file):
|
||||
ext = os.path.splitext(rgb_file)[1] # only ext
|
||||
f = f"{nn_image:06}"
|
||||
out_img = os.path.join(out_dir, DIR_RGB_DS, f + ext)
|
||||
shutil.copy2(rgb_file, out_img)
|
||||
|
||||
# заполним файлы с метками bbox
|
||||
img_id = img["id"]
|
||||
with open(os.path.join(out_dir, DIR_LABELS_DS, f + ".txt"), "w") as fh:
|
||||
for i in coco_data["annotations"]:
|
||||
if i["image_id"] == img_id:
|
||||
cat_id = i["category_id"]
|
||||
if cat_id < 999:
|
||||
bbox = i["bbox"]
|
||||
im_h = i["height"]
|
||||
im_w = i["width"]
|
||||
rel = convert2relative(im_h,im_w,bbox)
|
||||
# формат: <target> <x-center> <y-center> <width> <height>
|
||||
fh.write(f"{cat_id-1} {rel[0]} {rel[1]} {rel[2]} {rel[3]}\n") # category from 0
|
||||
|
||||
nn_image += 1
|
||||
line = os.path.join("./", DIR_RGB_DS, f + ext) + "\n"
|
||||
if nn_image % SZ_SERIES == 0:
|
||||
f2.write(line)
|
||||
else:
|
||||
f1.write(line)
|
||||
|
||||
def explore(path: str, res_dir: str):
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
folders = [
|
||||
os.path.join(path, o)
|
||||
for o in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, o))
|
||||
]
|
||||
for path_entry in folders:
|
||||
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)):
|
||||
gt_parse(path_entry, res_dir)
|
||||
else:
|
||||
explore(path_entry, res_dir)
|
||||
|
||||
def BOP2Yolo_dataset(dpath: str, out_dir: str, lname: list) -> str:
|
||||
""" Convert BOP-dataset to YOLO format for train """
|
||||
cfg_yaml = os.path.join(out_dir, FILE_RBS_TRAIN)
|
||||
p = os.path.join(out_dir, DIR_ROOT_DS, DIR_COCO_DS)
|
||||
cfg_data = {"path": p, "train": FILE_L_TRAIN, "val": FILE_L_VAL}
|
||||
cfg_data["names"] = {i:x for i,x in enumerate(lname)}
|
||||
with open(cfg_yaml, "w") as fh:
|
||||
yaml.dump(cfg_data, fh)
|
||||
|
||||
res_dir = os.path.join(out_dir, DIR_ROOT_DS)
|
||||
if not os.path.isdir(res_dir):
|
||||
os.mkdir(res_dir)
|
||||
|
||||
res_dir = os.path.join(res_dir, DIR_COCO_DS)
|
||||
if not os.path.isdir(res_dir):
|
||||
os.mkdir(res_dir)
|
||||
|
||||
p = os.path.join(res_dir, DIR_RGB_DS)
|
||||
if not os.path.isdir(p):
|
||||
os.mkdir(p)
|
||||
p = os.path.join(res_dir, DIR_LABELS_DS)
|
||||
if not os.path.isdir(p):
|
||||
os.mkdir(p)
|
||||
|
||||
global f1, f2
|
||||
f1 = open(os.path.join(res_dir, FILE_L_TRAIN), "w")
|
||||
f2 = open(os.path.join(res_dir, FILE_L_VAL), "w")
|
||||
explore(dpath, res_dir)
|
||||
f1.close()
|
||||
f2.close()
|
||||
|
||||
return out_dir
|
||||
|
||||
def train_YoloV8(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool):
|
||||
""" Main procedure for train YOLOv8 model """
|
||||
if not os.path.isdir(outpath):
|
||||
print(f"Invalid output path '{outpath}'")
|
||||
exit(-1)
|
||||
out_dir = os.path.join(outpath, wname)
|
||||
|
||||
if pretrain:
|
||||
# продолжить обучение
|
||||
if not os.path.isdir(out_dir):
|
||||
print(f"No dir '{out_dir}'")
|
||||
exit(-2)
|
||||
dpath = out_dir
|
||||
model_path = os.path.join(dpath, wname + ".pt")
|
||||
else:
|
||||
# обучение сначала
|
||||
if not os.path.isdir(out_dir):
|
||||
os.mkdir(out_dir)
|
||||
|
||||
ds_path = os.path.join(path, dname)
|
||||
rbs_info = os.path.join(ds_path, FILE_RBS_INFO)
|
||||
if not os.path.isfile(rbs_info):
|
||||
print(f"{rbs_info} : no dataset description file")
|
||||
exit(-3)
|
||||
|
||||
with open(rbs_info, "r") as fh:
|
||||
y = json.load(fh)
|
||||
# список имён объектов
|
||||
list_name = list(map(lambda x: x["name"], y))
|
||||
|
||||
dpath = BOP2Yolo_dataset(ds_path, out_dir, list_name)
|
||||
if len(dpath) == 0:
|
||||
print(f"Error in convert dataset '{ds_path}' to '{outpath}'")
|
||||
exit(-4)
|
||||
model_path = os.path.join(dpath, FILE_BASEMODEL)
|
||||
|
||||
model = YOLO(model_path)
|
||||
results = model.train(data=os.path.join(dpath, FILE_RBS_TRAIN), epochs=epochs, project=out_dir)
|
||||
wf = os.path.join(results.save_dir, FILE_TRAIN_RES)
|
||||
if not os.path.isfile(wf):
|
||||
print(f"Error in train: no result file '{wf}'")
|
||||
exit(-5)
|
||||
|
||||
shutil.copy2(wf, os.path.join(dpath, wname + ".pt"))
|
||||
shutil.rmtree(results.save_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--path", required=True, help="Path for dataset")
|
||||
parser.add_argument("--name", required=True, help="String with result weights name")
|
||||
parser.add_argument("--datasetName", required=True, help="String with dataset name")
|
||||
parser.add_argument("--outpath", default="weights", help="Output path for weights")
|
||||
parser.add_argument("--epoch", default=3, type=int, help="How many training epochs")
|
||||
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
|
||||
args = parser.parse_args()
|
||||
|
||||
train_YoloV8(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)
|
|
@ -1,967 +0,0 @@
|
|||
"""
|
||||
NVIDIA from jtremblay@gmail.com
|
||||
"""
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
|
||||
import torch.utils.data
|
||||
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
import torch.utils.data as data
|
||||
import glob
|
||||
import os
|
||||
import boto3
|
||||
import io
|
||||
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageEnhance
|
||||
|
||||
from math import acos
|
||||
from math import sqrt
|
||||
from math import pi
|
||||
|
||||
from os.path import exists, basename
|
||||
import json
|
||||
from os.path import join
|
||||
|
||||
import albumentations as A
|
||||
|
||||
|
||||
def default_loader(path):
|
||||
return Image.open(path).convert("RGB")
|
||||
|
||||
|
||||
def length(v):
|
||||
return sqrt(v[0] ** 2 + v[1] ** 2)
|
||||
|
||||
|
||||
def dot_product(v, w):
|
||||
return v[0] * w[0] + v[1] * w[1]
|
||||
|
||||
|
||||
def normalize(v):
|
||||
norm = np.linalg.norm(v, ord=1)
|
||||
if norm == 0:
|
||||
norm = np.finfo(v.dtype).eps
|
||||
return v / norm
|
||||
|
||||
|
||||
def determinant(v, w):
|
||||
return v[0] * w[1] - v[1] * w[0]
|
||||
|
||||
|
||||
def inner_angle(v, w):
|
||||
cosx = dot_product(v, w) / (length(v) * length(w))
|
||||
rad = acos(cosx) # in radians
|
||||
return rad * 180 / pi # returns degrees
|
||||
|
||||
|
||||
def py_ang(A, B=(1, 0)):
|
||||
inner = inner_angle(A, B)
|
||||
det = determinant(A, B)
|
||||
if (
|
||||
det < 0
|
||||
): # this is a property of the det. If the det < 0 then B is clockwise of A
|
||||
return inner
|
||||
else: # if the det > 0 then A is immediately clockwise of B
|
||||
return 360 - inner
|
||||
|
||||
|
||||
import colorsys, math
|
||||
|
||||
|
||||
def append_dot(extensions):
|
||||
res = []
|
||||
|
||||
for ext in extensions:
|
||||
if not ext.startswith("."):
|
||||
res.append(f".{ext}")
|
||||
else:
|
||||
res.append(ext)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def loadimages(root, extensions=["png"]):
|
||||
imgs = []
|
||||
extensions = append_dot(extensions)
|
||||
|
||||
def add_json_files(
|
||||
path,
|
||||
):
|
||||
for ext in extensions:
|
||||
for file in os.listdir(path):
|
||||
imgpath = os.path.join(path, file)
|
||||
if (
|
||||
imgpath.endswith(ext)
|
||||
and exists(imgpath)
|
||||
and exists(imgpath.replace(ext, ".json"))
|
||||
):
|
||||
imgs.append(
|
||||
(
|
||||
imgpath,
|
||||
imgpath.replace(path, "").replace("/", ""),
|
||||
imgpath.replace(ext, ".json"),
|
||||
)
|
||||
)
|
||||
|
||||
def explore(path):
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
folders = [
|
||||
os.path.join(path, o)
|
||||
for o in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, o))
|
||||
]
|
||||
|
||||
for path_entry in folders:
|
||||
explore(path_entry)
|
||||
|
||||
add_json_files(path)
|
||||
|
||||
explore(root)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def loadweights(root):
|
||||
if root.endswith(".pth") and os.path.isfile(root):
|
||||
return [root]
|
||||
else:
|
||||
weights = [
|
||||
os.path.join(root, f)
|
||||
for f in os.listdir(root)
|
||||
if os.path.isfile(os.path.join(root, f)) and f.endswith(".pth")
|
||||
]
|
||||
|
||||
weights.sort()
|
||||
return weights
|
||||
|
||||
|
||||
def loadimages_inference(root, extensions):
|
||||
imgs, imgsname = [], []
|
||||
extensions = append_dot(extensions)
|
||||
|
||||
def add_imgs(
|
||||
path,
|
||||
):
|
||||
for ext in extensions:
|
||||
for file in os.listdir(path):
|
||||
imgpath = os.path.join(path, file)
|
||||
if imgpath.endswith(ext) and exists(imgpath):
|
||||
imgs.append(imgpath)
|
||||
imgsname.append(imgpath.replace(root, ""))
|
||||
|
||||
def explore(path):
|
||||
if not os.path.isdir(path):
|
||||
return
|
||||
folders = [
|
||||
os.path.join(path, o)
|
||||
for o in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, o))
|
||||
]
|
||||
|
||||
for path_entry in folders:
|
||||
explore(path_entry)
|
||||
|
||||
add_imgs(path)
|
||||
|
||||
explore(root)
|
||||
|
||||
return imgs, imgsname
|
||||
|
||||
|
||||
class CleanVisiiDopeLoader(data.Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
path_dataset,
|
||||
objects=None,
|
||||
sigma=1,
|
||||
output_size=400,
|
||||
extensions=["png"],
|
||||
debug=False,
|
||||
use_s3=False,
|
||||
buckets=[],
|
||||
endpoint_url=None,
|
||||
):
|
||||
###################
|
||||
self.path_dataset = path_dataset
|
||||
self.objects_interest = objects
|
||||
self.sigma = sigma
|
||||
self.output_size = output_size
|
||||
self.extensions = append_dot(extensions)
|
||||
self.debug = debug
|
||||
###################
|
||||
|
||||
self.imgs = []
|
||||
self.s3_buckets = {}
|
||||
self.use_s3 = use_s3
|
||||
|
||||
if self.use_s3:
|
||||
self.session = boto3.Session()
|
||||
self.s3 = self.session.resource(
|
||||
service_name="s3", endpoint_url=endpoint_url
|
||||
)
|
||||
|
||||
for bucket_name in buckets:
|
||||
try:
|
||||
self.s3_buckets[bucket_name] = self.s3.Bucket(bucket_name)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Error trying to load bucket {bucket_name} for training data:",
|
||||
e,
|
||||
)
|
||||
|
||||
for bucket in self.s3_buckets:
|
||||
bucket_objects = [
|
||||
str(obj.key) for obj in self.s3_buckets[bucket].objects.all()
|
||||
]
|
||||
|
||||
jsons = set([json for json in bucket_objects if json.endswith(".json")])
|
||||
imgs = [
|
||||
img
|
||||
for img in bucket_objects
|
||||
if img.endswith(tuple(self.extensions))
|
||||
]
|
||||
|
||||
for ext in self.extensions:
|
||||
for img in imgs:
|
||||
# Only add images that have a ground truth file
|
||||
if img.endswith(ext) and img.replace(ext, ".json") in jsons:
|
||||
# (img key, bucket name, json key)
|
||||
self.imgs.append((img, bucket, img.replace(ext, ".json")))
|
||||
|
||||
else:
|
||||
for path_look in path_dataset:
|
||||
self.imgs += loadimages(path_look, extensions=self.extensions)
|
||||
|
||||
# np.random.shuffle(self.imgs)
|
||||
print("Number of Training Images:", len(self.imgs))
|
||||
print(self.imgs)
|
||||
|
||||
if debug:
|
||||
print("Debuging will be save in debug/")
|
||||
if os.path.isdir("debug"):
|
||||
print(f'folder {"debug"}/ exists')
|
||||
else:
|
||||
os.mkdir("debug")
|
||||
print(f'created folder {"debug"}/')
|
||||
|
||||
def __len__(self):
|
||||
return len(self.imgs)
|
||||
|
||||
def __getitem__(self, index):
|
||||
|
||||
# load the data
|
||||
if self.use_s3:
|
||||
img_key, bucket, json_key = self.imgs[index]
|
||||
mem_img = io.BytesIO()
|
||||
|
||||
object_img = self.s3_buckets[bucket].Object(img_key)
|
||||
object_img.download_fileobj(mem_img)
|
||||
|
||||
img = np.array(Image.open(mem_img).convert("RGB"))
|
||||
|
||||
object_json = self.s3_buckets[bucket].Object(json_key)
|
||||
data_json = json.load(object_json.get()["Body"])
|
||||
|
||||
img_name = img_key[:-3]
|
||||
|
||||
else:
|
||||
path_img, img_name, path_json = self.imgs[index]
|
||||
|
||||
# load the image
|
||||
img = np.array(Image.open(path_img).convert("RGB"))
|
||||
|
||||
# load the json file
|
||||
with open(path_json) as f:
|
||||
data_json = json.load(f)
|
||||
|
||||
all_projected_cuboid_keypoints = []
|
||||
|
||||
# load the projected cuboid keypoints
|
||||
for obj in data_json["objects"]:
|
||||
if (
|
||||
self.objects_interest is not None
|
||||
and not obj["class"] in self.objects_interest
|
||||
):
|
||||
continue
|
||||
# load the projected_cuboid_keypoints
|
||||
# 06.02.2024 @shalenikol
|
||||
# if obj["visibility_image"] > 0:
|
||||
if obj["visibility"] > 0:
|
||||
projected_cuboid_keypoints = obj["projected_cuboid"]
|
||||
# FAT dataset only has 8 corners for 'projected_cuboid'
|
||||
if len(projected_cuboid_keypoints) == 8:
|
||||
projected_cuboid_keypoints.append(obj["projected_cuboid_centroid"])
|
||||
else:
|
||||
projected_cuboid_keypoints = [
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
]
|
||||
all_projected_cuboid_keypoints.append(projected_cuboid_keypoints)
|
||||
|
||||
if len(all_projected_cuboid_keypoints) == 0:
|
||||
all_projected_cuboid_keypoints = [
|
||||
[
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
[-100, -100],
|
||||
]
|
||||
]
|
||||
|
||||
# flatten the keypoints
|
||||
flatten_projected_cuboid = []
|
||||
for obj in all_projected_cuboid_keypoints:
|
||||
for p in obj:
|
||||
flatten_projected_cuboid.append(p)
|
||||
|
||||
#######
|
||||
if self.debug:
|
||||
img_to_save = Image.fromarray(img)
|
||||
draw = ImageDraw.Draw(img_to_save)
|
||||
|
||||
for ip, p in enumerate(flatten_projected_cuboid):
|
||||
draw.ellipse(
|
||||
(int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2),
|
||||
fill="green",
|
||||
)
|
||||
|
||||
img_to_save.save(f"debug/{img_name.replace('.png','_original.png')}")
|
||||
#######
|
||||
|
||||
# data augmentation
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.RandomCrop(width=400, height=400),
|
||||
A.Rotate(limit=180),
|
||||
A.RandomBrightnessContrast(
|
||||
brightness_limit=0.2, contrast_limit=0.15, p=1
|
||||
),
|
||||
A.GaussNoise(p=1),
|
||||
],
|
||||
keypoint_params=A.KeypointParams(format="xy", remove_invisible=False),
|
||||
)
|
||||
transformed = transform(image=img, keypoints=flatten_projected_cuboid)
|
||||
img_transformed = transformed["image"]
|
||||
flatten_projected_cuboid_transformed = transformed["keypoints"]
|
||||
|
||||
#######
|
||||
|
||||
# transform to the final output
|
||||
if not self.output_size == 400:
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.Resize(width=self.output_size, height=self.output_size),
|
||||
],
|
||||
keypoint_params=A.KeypointParams(format="xy", remove_invisible=False),
|
||||
)
|
||||
transformed = transform(
|
||||
image=img_transformed, keypoints=flatten_projected_cuboid_transformed
|
||||
)
|
||||
img_transformed_output_size = transformed["image"]
|
||||
flatten_projected_cuboid_transformed_output_size = transformed["keypoints"]
|
||||
|
||||
else:
|
||||
img_transformed_output_size = img_transformed
|
||||
flatten_projected_cuboid_transformed_output_size = (
|
||||
flatten_projected_cuboid_transformed
|
||||
)
|
||||
|
||||
#######
|
||||
if self.debug:
|
||||
img_transformed_saving = Image.fromarray(img_transformed)
|
||||
|
||||
draw = ImageDraw.Draw(img_transformed_saving)
|
||||
|
||||
for ip, p in enumerate(flatten_projected_cuboid_transformed):
|
||||
draw.ellipse(
|
||||
(int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2),
|
||||
fill="green",
|
||||
)
|
||||
|
||||
img_transformed_saving.save(
|
||||
f"debug/{img_name.replace('.png','_transformed.png')}"
|
||||
)
|
||||
#######
|
||||
|
||||
# update the keypoints list
|
||||
# obj x keypoint_id x (x,y)
|
||||
i_all = 0
|
||||
for i_obj, obj in enumerate(all_projected_cuboid_keypoints):
|
||||
for i_p, point in enumerate(obj):
|
||||
all_projected_cuboid_keypoints[i_obj][
|
||||
i_p
|
||||
] = flatten_projected_cuboid_transformed_output_size[i_all]
|
||||
i_all += 1
|
||||
|
||||
# generate the belief maps
|
||||
beliefs = CreateBeliefMap(
|
||||
size=int(self.output_size),
|
||||
pointsBelief=all_projected_cuboid_keypoints,
|
||||
sigma=self.sigma,
|
||||
nbpoints=9,
|
||||
save=False,
|
||||
)
|
||||
beliefs = torch.from_numpy(np.array(beliefs))
|
||||
# generate affinity fields with centroid.
|
||||
affinities = GenerateMapAffinity(
|
||||
size=int(self.output_size),
|
||||
nb_vertex=8,
|
||||
pointsInterest=all_projected_cuboid_keypoints,
|
||||
objects_centroid=np.array(all_projected_cuboid_keypoints)[:, -1].tolist(),
|
||||
scale=1,
|
||||
)
|
||||
|
||||
# prepare for the image tensors
|
||||
normalize_tensor = transforms.Compose(
|
||||
[
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
]
|
||||
)
|
||||
to_tensor = transforms.Compose(
|
||||
[
|
||||
transforms.ToTensor(),
|
||||
]
|
||||
)
|
||||
img_tensor = normalize_tensor(Image.fromarray(img_transformed))
|
||||
img_original = to_tensor(img_transformed)
|
||||
|
||||
########
|
||||
if self.debug:
|
||||
imgs = VisualizeBeliefMap(beliefs)
|
||||
img, grid = save_image(
|
||||
imgs,
|
||||
f"debug/{img_name.replace('.png','_beliefs.png')}",
|
||||
mean=0,
|
||||
std=1,
|
||||
nrow=3,
|
||||
save=True,
|
||||
)
|
||||
imgs = VisualizeAffinityMap(affinities)
|
||||
save_image(
|
||||
imgs,
|
||||
f"debug/{img_name.replace('.png','_affinities.png')}",
|
||||
mean=0,
|
||||
std=1,
|
||||
nrow=3,
|
||||
save=True,
|
||||
)
|
||||
########
|
||||
img_tensor[torch.isnan(img_tensor)] = 0
|
||||
affinities[torch.isnan(affinities)] = 0
|
||||
beliefs[torch.isnan(beliefs)] = 0
|
||||
|
||||
img_tensor[torch.isinf(img_tensor)] = 0
|
||||
affinities[torch.isinf(affinities)] = 0
|
||||
beliefs[torch.isinf(beliefs)] = 0
|
||||
|
||||
return {
|
||||
"img": img_tensor,
|
||||
"affinities": torch.clamp(affinities, -1, 1),
|
||||
"beliefs": torch.clamp(beliefs, 0, 1),
|
||||
"file_name": img_name,
|
||||
"img_original": img_original,
|
||||
}
|
||||
|
||||
|
||||
def VisualizeAffinityMap(
|
||||
tensor,
|
||||
# tensor of (len(keypoints)*2)xwxh
|
||||
threshold_norm_vector=0.4,
|
||||
# how long does the vector has to be to be drawn
|
||||
points=None,
|
||||
# list of points to draw in white on top of the image
|
||||
factor=1.0,
|
||||
# by how much the image was reduced, scale factor
|
||||
translation=(0, 0)
|
||||
# by how much the points were moved
|
||||
# return len(keypoints)x3xwxh # stack of images
|
||||
):
|
||||
images = torch.zeros(tensor.shape[0] // 2, 3, tensor.shape[1], tensor.shape[2])
|
||||
for i_image in range(0, tensor.shape[0], 2): # could be read as i_keypoint
|
||||
|
||||
indices = (
|
||||
torch.abs(tensor[i_image, :, :]) + torch.abs(tensor[i_image + 1, :, :])
|
||||
> threshold_norm_vector
|
||||
).nonzero()
|
||||
|
||||
for indice in indices:
|
||||
|
||||
i, j = indice
|
||||
|
||||
angle_vector = np.array([tensor[i_image, i, j], tensor[i_image + 1, i, j]])
|
||||
if length(angle_vector) > threshold_norm_vector:
|
||||
angle = py_ang(angle_vector)
|
||||
c = colorsys.hsv_to_rgb(angle / 360, 1, 1)
|
||||
else:
|
||||
c = [0, 0, 0]
|
||||
for i_c in range(3):
|
||||
images[i_image // 2, i_c, i, j] = c[i_c]
|
||||
if not points is None:
|
||||
point = points[i_image // 2]
|
||||
|
||||
print(
|
||||
int(point[1] * factor + translation[1]),
|
||||
int(point[0] * factor + translation[0]),
|
||||
)
|
||||
images[
|
||||
i_image // 2,
|
||||
:,
|
||||
int(point[1] * factor + translation[1])
|
||||
- 1 : int(point[1] * factor + translation[1])
|
||||
+ 1,
|
||||
int(point[0] * factor + translation[0])
|
||||
- 1 : int(point[0] * factor + translation[0])
|
||||
+ 1,
|
||||
] = 1
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def VisualizeBeliefMap(
|
||||
tensor,
|
||||
# tensor of len(keypoints)xwxh
|
||||
points=None,
|
||||
# list of points to draw on top of the image
|
||||
factor=1.0,
|
||||
# by how much the image was reduced, scale factor
|
||||
translation=(0, 0)
|
||||
# by how much the points were moved
|
||||
# return len(keypoints)x3xwxh # stack of images in torch tensor
|
||||
):
|
||||
images = torch.zeros(tensor.shape[0], 3, tensor.shape[1], tensor.shape[2])
|
||||
for i_image in range(0, tensor.shape[0]): # could be read as i_keypoint
|
||||
|
||||
belief = tensor[i_image].clone()
|
||||
belief -= float(torch.min(belief).item())
|
||||
belief /= float(torch.max(belief).item())
|
||||
|
||||
belief = torch.clamp(belief, 0, 1)
|
||||
belief = torch.cat(
|
||||
[belief.unsqueeze(0), belief.unsqueeze(0), belief.unsqueeze(0)]
|
||||
).unsqueeze(0)
|
||||
|
||||
images[i_image] = belief
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def GenerateMapAffinity(
|
||||
size, nb_vertex, pointsInterest, objects_centroid, scale, save=False
|
||||
):
|
||||
# Apply the downscale right now, so the vectors are correct.
|
||||
|
||||
img_affinity = Image.new("RGB", (int(size / scale), int(size / scale)), "black")
|
||||
# create the empty tensors
|
||||
totensor = transforms.Compose([transforms.ToTensor()])
|
||||
|
||||
affinities = []
|
||||
for i_points in range(nb_vertex):
|
||||
affinities.append(torch.zeros(2, int(size / scale), int(size / scale)))
|
||||
|
||||
for i_pointsImage in range(len(pointsInterest)):
|
||||
pointsImage = pointsInterest[i_pointsImage]
|
||||
center = objects_centroid[i_pointsImage]
|
||||
for i_points in range(nb_vertex):
|
||||
point = pointsImage[i_points]
|
||||
|
||||
affinity_pair, img_affinity = getAfinityCenter(
|
||||
int(size / scale),
|
||||
int(size / scale),
|
||||
tuple((np.array(pointsImage[i_points]) / scale).tolist()),
|
||||
tuple((np.array(center) / scale).tolist()),
|
||||
img_affinity=img_affinity,
|
||||
radius=1,
|
||||
)
|
||||
|
||||
affinities[i_points] = (affinities[i_points] + affinity_pair) / 2
|
||||
|
||||
# Normalizing
|
||||
v = affinities[i_points].numpy()
|
||||
|
||||
xvec = v[0]
|
||||
yvec = v[1]
|
||||
|
||||
norms = np.sqrt(xvec * xvec + yvec * yvec)
|
||||
nonzero = norms > 0
|
||||
|
||||
xvec[nonzero] /= norms[nonzero]
|
||||
yvec[nonzero] /= norms[nonzero]
|
||||
|
||||
affinities[i_points] = torch.from_numpy(np.concatenate([[xvec], [yvec]]))
|
||||
affinities = torch.cat(affinities, 0)
|
||||
|
||||
return affinities
|
||||
|
||||
|
||||
def getAfinityCenter(
|
||||
width, height, point, center, radius=7, tensor=None, img_affinity=None
|
||||
):
|
||||
"""
|
||||
Create the affinity map
|
||||
"""
|
||||
if tensor is None:
|
||||
tensor = torch.zeros(2, height, width).float()
|
||||
|
||||
# create the canvas for the afinity output
|
||||
imgAffinity = Image.new("RGB", (width, height), "black")
|
||||
totensor = transforms.Compose([transforms.ToTensor()])
|
||||
draw = ImageDraw.Draw(imgAffinity)
|
||||
r1 = radius
|
||||
p = point
|
||||
draw.ellipse((p[0] - r1, p[1] - r1, p[0] + r1, p[1] + r1), (255, 255, 255))
|
||||
|
||||
del draw
|
||||
|
||||
# compute the array to add the afinity
|
||||
array = (np.array(imgAffinity) / 255)[:, :, 0]
|
||||
|
||||
angle_vector = np.array(center) - np.array(point)
|
||||
angle_vector = normalize(angle_vector)
|
||||
affinity = np.concatenate([[array * angle_vector[0]], [array * angle_vector[1]]])
|
||||
|
||||
if not img_affinity is None:
|
||||
# find the angle vector
|
||||
if length(angle_vector) > 0:
|
||||
angle = py_ang(angle_vector)
|
||||
else:
|
||||
angle = 0
|
||||
c = np.array(colorsys.hsv_to_rgb(angle / 360, 1, 1)) * 255
|
||||
draw = ImageDraw.Draw(img_affinity)
|
||||
draw.ellipse(
|
||||
(p[0] - r1, p[1] - r1, p[0] + r1, p[1] + r1),
|
||||
fill=(int(c[0]), int(c[1]), int(c[2])),
|
||||
)
|
||||
del draw
|
||||
re = torch.from_numpy(affinity).float() + tensor
|
||||
return re, img_affinity
|
||||
|
||||
|
||||
def CreateBeliefMap(size, pointsBelief, nbpoints, sigma=16, save=False):
|
||||
# Create the belief maps in the points
|
||||
beliefsImg = []
|
||||
for numb_point in range(nbpoints):
|
||||
array = np.zeros([size, size])
|
||||
out = np.zeros([size, size])
|
||||
|
||||
for point in pointsBelief:
|
||||
p = [point[numb_point][1], point[numb_point][0]]
|
||||
w = int(sigma * 2)
|
||||
if p[0] - w >= 0 and p[0] + w < size and p[1] - w >= 0 and p[1] + w < size:
|
||||
for i in range(int(p[0]) - w, int(p[0]) + w + 1):
|
||||
for j in range(int(p[1]) - w, int(p[1]) + w + 1):
|
||||
|
||||
# if there is already a point there.
|
||||
array[i, j] = max(
|
||||
np.exp(
|
||||
-(
|
||||
((i - p[0]) ** 2 + (j - p[1]) ** 2)
|
||||
/ (2 * (sigma**2))
|
||||
)
|
||||
),
|
||||
array[i, j],
|
||||
)
|
||||
|
||||
beliefsImg.append(array.copy())
|
||||
|
||||
if save:
|
||||
stack = np.stack([array, array, array], axis=0).transpose(2, 1, 0)
|
||||
imgBelief = Image.fromarray((stack * 255).astype("uint8"))
|
||||
imgBelief.save("debug/{}.png".format(numb_point))
|
||||
return beliefsImg
|
||||
|
||||
|
||||
def crop(img, i, j, h, w):
|
||||
"""Crop the given PIL.Image.
|
||||
Args:
|
||||
img (PIL.Image): Image to be cropped.
|
||||
i: Upper pixel coordinate.
|
||||
j: Left pixel coordinate.
|
||||
h: Height of the cropped image.
|
||||
w: Width of the cropped image.
|
||||
Returns:
|
||||
PIL.Image: Cropped image.
|
||||
"""
|
||||
return img.crop((j, i, j + w, i + h))
|
||||
|
||||
|
||||
class AddRandomContrast(object):
|
||||
"""
|
||||
Apply some random image filters from PIL
|
||||
"""
|
||||
|
||||
def __init__(self, sigma=0.1):
|
||||
self.sigma = sigma
|
||||
|
||||
def __call__(self, im):
|
||||
|
||||
contrast = ImageEnhance.Contrast(im)
|
||||
|
||||
im = contrast.enhance(np.random.normal(1, self.sigma))
|
||||
|
||||
return im
|
||||
|
||||
|
||||
class AddRandomBrightness(object):
|
||||
"""
|
||||
Apply some random image filters from PIL
|
||||
"""
|
||||
|
||||
def __init__(self, sigma=0.1):
|
||||
self.sigma = sigma
|
||||
|
||||
def __call__(self, im):
|
||||
|
||||
contrast = ImageEnhance.Brightness(im)
|
||||
im = contrast.enhance(np.random.normal(1, self.sigma))
|
||||
return im
|
||||
|
||||
|
||||
class AddNoise(object):
|
||||
"""Given mean: (R, G, B) and std: (R, G, B),
|
||||
will normalize each channel of the torch.*Tensor, i.e.
|
||||
channel = (channel - mean) / std
|
||||
"""
|
||||
|
||||
def __init__(self, std=0.1):
|
||||
self.std = std
|
||||
|
||||
def __call__(self, tensor):
|
||||
# TODO: make efficient
|
||||
t = torch.FloatTensor(tensor.size()).normal_(0, self.std)
|
||||
|
||||
t = tensor.add(t)
|
||||
t = torch.clamp(t, -1, 1) # this is expansive
|
||||
return t
|
||||
|
||||
|
||||
irange = range
|
||||
|
||||
|
||||
def make_grid(
|
||||
tensor,
|
||||
nrow=8,
|
||||
padding=2,
|
||||
normalize=False,
|
||||
range=None,
|
||||
scale_each=False,
|
||||
pad_value=0,
|
||||
):
|
||||
"""Make a grid of images.
|
||||
Args:
|
||||
tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
|
||||
or a list of images all of the same size.
|
||||
nrow (int, optional): Number of images displayed in each row of the grid.
|
||||
The Final grid size is (B / nrow, nrow). Default is 8.
|
||||
padding (int, optional): amount of padding. Default is 2.
|
||||
normalize (bool, optional): If True, shift the image to the range (0, 1),
|
||||
by subtracting the minimum and dividing by the maximum pixel value.
|
||||
range (tuple, optional): tuple (min, max) where min and max are numbers,
|
||||
then these numbers are used to normalize the image. By default, min and max
|
||||
are computed from the tensor.
|
||||
scale_each (bool, optional): If True, scale each image in the batch of
|
||||
images separately rather than the (min, max) over all images.
|
||||
pad_value (float, optional): Value for the padded pixels.
|
||||
Example:
|
||||
See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_
|
||||
"""
|
||||
if not (
|
||||
torch.is_tensor(tensor)
|
||||
or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))
|
||||
):
|
||||
raise TypeError(
|
||||
"tensor or list of tensors expected, got {}".format(type(tensor))
|
||||
)
|
||||
|
||||
# if list of tensors, convert to a 4D mini-batch Tensor
|
||||
if isinstance(tensor, list):
|
||||
tensor = torch.stack(tensor, dim=0)
|
||||
|
||||
if tensor.dim() == 2: # single image H x W
|
||||
tensor = tensor.view(1, tensor.size(0), tensor.size(1))
|
||||
if tensor.dim() == 3: # single image
|
||||
if tensor.size(0) == 1: # if single-channel, convert to 3-channel
|
||||
tensor = torch.cat((tensor, tensor, tensor), 0)
|
||||
tensor = tensor.view(1, tensor.size(0), tensor.size(1), tensor.size(2))
|
||||
|
||||
if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
|
||||
tensor = torch.cat((tensor, tensor, tensor), 1)
|
||||
|
||||
if normalize is True:
|
||||
tensor = tensor.clone() # avoid modifying tensor in-place
|
||||
if range is not None:
|
||||
assert isinstance(
|
||||
range, tuple
|
||||
), "range has to be a tuple (min, max) if specified. min and max are numbers"
|
||||
|
||||
def norm_ip(img, min, max):
|
||||
img.clamp_(min=min, max=max)
|
||||
img.add_(-min).div_(max - min + 1e-5)
|
||||
|
||||
def norm_range(t, range):
|
||||
if range is not None:
|
||||
norm_ip(t, range[0], range[1])
|
||||
else:
|
||||
norm_ip(t, float(t.min()), float(t.max()))
|
||||
|
||||
if scale_each is True:
|
||||
for t in tensor: # loop over mini-batch dimension
|
||||
norm_range(t, range)
|
||||
else:
|
||||
norm_range(tensor, range)
|
||||
|
||||
if tensor.size(0) == 1:
|
||||
return tensor.squeeze()
|
||||
|
||||
# make the mini-batch of images into a grid
|
||||
nmaps = tensor.size(0)
|
||||
xmaps = min(nrow, nmaps)
|
||||
ymaps = int(math.ceil(float(nmaps) / xmaps))
|
||||
height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
|
||||
grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(
|
||||
pad_value
|
||||
)
|
||||
k = 0
|
||||
for y in irange(ymaps):
|
||||
for x in irange(xmaps):
|
||||
if k >= nmaps:
|
||||
break
|
||||
grid.narrow(1, y * height + padding, height - padding).narrow(
|
||||
2, x * width + padding, width - padding
|
||||
).copy_(tensor[k])
|
||||
k = k + 1
|
||||
return grid
|
||||
|
||||
|
||||
def save_image(tensor, filename, nrow=4, padding=2, mean=None, std=None, save=True):
|
||||
"""
|
||||
Saves a given Tensor into an image file.
|
||||
If given a mini-batch tensor, will save the tensor as a grid of images.
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
tensor = tensor.cpu()
|
||||
grid = make_grid(tensor, nrow=nrow, padding=10, pad_value=1)
|
||||
if not mean is None:
|
||||
# ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0,2).transpose(0,1).numpy()
|
||||
ndarr = (
|
||||
grid.mul(std)
|
||||
.add(mean)
|
||||
.mul(255)
|
||||
.byte()
|
||||
.transpose(0, 2)
|
||||
.transpose(0, 1)
|
||||
.numpy()
|
||||
)
|
||||
else:
|
||||
ndarr = (
|
||||
grid.mul(0.5)
|
||||
.add(0.5)
|
||||
.mul(255)
|
||||
.byte()
|
||||
.transpose(0, 2)
|
||||
.transpose(0, 1)
|
||||
.numpy()
|
||||
)
|
||||
im = Image.fromarray(ndarr)
|
||||
if save is True:
|
||||
im.save(filename)
|
||||
return im, grid
|
||||
|
||||
|
||||
from PIL import ImageDraw, Image, ImageFont
|
||||
import json
|
||||
|
||||
|
||||
class Draw(object):
|
||||
"""Drawing helper class to visualize the neural network output"""
|
||||
|
||||
def __init__(self, im):
|
||||
"""
|
||||
:param im: The image to draw in.
|
||||
"""
|
||||
self.draw = ImageDraw.Draw(im)
|
||||
self.width = im.size[0]
|
||||
|
||||
def draw_line(self, point1, point2, line_color, line_width=2):
|
||||
"""Draws line on image"""
|
||||
if point1 is not None and point2 is not None:
|
||||
self.draw.line([point1, point2], fill=line_color, width=line_width)
|
||||
|
||||
def draw_dot(self, point, point_color, point_radius):
|
||||
"""Draws dot (filled circle) on image"""
|
||||
if point is not None:
|
||||
xy = [
|
||||
point[0] - point_radius,
|
||||
point[1] - point_radius,
|
||||
point[0] + point_radius,
|
||||
point[1] + point_radius,
|
||||
]
|
||||
self.draw.ellipse(xy, fill=point_color, outline=point_color)
|
||||
|
||||
def draw_text(self, point, text, text_color):
|
||||
"""Draws text on image"""
|
||||
if point is not None:
|
||||
self.draw.text(point, text, fill=text_color, font=ImageFont.truetype("misc/arial.ttf", self.width // 50))
|
||||
|
||||
def draw_cube(self, points, color=(0, 255, 0)):
|
||||
"""
|
||||
Draws cube with a thick solid line across
|
||||
the front top edge and an X on the top face.
|
||||
"""
|
||||
# draw front
|
||||
self.draw_line(points[0], points[1], color)
|
||||
self.draw_line(points[1], points[2], color)
|
||||
self.draw_line(points[3], points[2], color)
|
||||
self.draw_line(points[3], points[0], color)
|
||||
|
||||
# draw back
|
||||
self.draw_line(points[4], points[5], color)
|
||||
self.draw_line(points[6], points[5], color)
|
||||
self.draw_line(points[6], points[7], color)
|
||||
self.draw_line(points[4], points[7], color)
|
||||
|
||||
# draw sides
|
||||
self.draw_line(points[0], points[4], color)
|
||||
self.draw_line(points[7], points[3], color)
|
||||
self.draw_line(points[5], points[1], color)
|
||||
self.draw_line(points[2], points[6], color)
|
||||
|
||||
# draw dots
|
||||
self.draw_dot(points[0], point_color=color, point_radius=4)
|
||||
self.draw_dot(points[1], point_color=color, point_radius=4)
|
||||
|
||||
# draw x on the top
|
||||
self.draw_line(points[0], points[5], color)
|
||||
self.draw_line(points[1], points[4], color)
|
||||
|
||||
# Draw center
|
||||
self.draw_dot(points[8], point_color=color, point_radius=6)
|
||||
|
||||
for i in range(9):
|
||||
self.draw_text(points[i], str(i), (255, 0, 0))
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue