545 lines
No EOL
20 KiB
Python
545 lines
No EOL
20 KiB
Python
"""
|
||
train_Dope
|
||
Общая задача: оценка позиции объекта (Pose estimation)
|
||
Реализуемая функция: обучение нейросетевой модели DOPE по заданному BOP-датасету
|
||
|
||
python3 $PYTHON_EDUCATION --path /Users/user/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \
|
||
--name test1234 --datasetName 32123213
|
||
|
||
08.05.2024 @shalenikol release 0.1
|
||
"""
|
||
import os
|
||
import json
|
||
import shutil
|
||
import numpy as np
|
||
import transforms3d as t3d
|
||
|
||
FILE_RBS_INFO = "rbs_info.json"
|
||
FILE_CAMERA = "camera.json"
|
||
FILE_GT = "scene_gt.json"
|
||
FILE_GT_COCO = "scene_gt_coco.json"
|
||
FILE_GT_INFO = "scene_gt_info.json"
|
||
|
||
FILE_MODEL = "epoch"
|
||
EXT_MODEL = ".pth"
|
||
EXT_RGB = "jpg"
|
||
DIR_ROOT_DS = "dataset_dope"
|
||
DIR_TRAIN_OUT = "out_weights"
|
||
|
||
MODEL_SCALE = 1000 # исходная модель в метрах, преобразуем в мм (для DOPE)
|
||
|
||
# Own_Numbering_Files = True # наименование image-файлов: собственная нумерация
|
||
nn_image = 0
|
||
K_intrinsic = []
|
||
model_info = []
|
||
camera_data = {}
|
||
im_width = 0
|
||
|
||
nb_update_network = 0
|
||
# [
|
||
# [min(x), min(y), min(z)],
|
||
# [min(x), max(y), min(z)],
|
||
# [min(x), max(y), max(z)],
|
||
# [min(x), min(y), max(z)],
|
||
# [max(x), min(y), max(z)],
|
||
# [max(x), max(y), min(z)],
|
||
# [max(x), max(y), max(z)],
|
||
# [max(x), min(y), max(z)],
|
||
# [xc, yc, zc] # min + (max - min) / 2
|
||
# ]
|
||
|
||
def trans_3Dto2D_point_in_camera(xyz, K_m, R_m2c, t_m2c):
|
||
"""
|
||
xyz : 3D-координаты точки
|
||
K_m : внутренняя матрица камеры 3х3
|
||
R_m2c : матрица поворота 3х3
|
||
t_m2c : вектор перемещения 3х1
|
||
return [u,v]
|
||
"""
|
||
K = np.array(K_m)
|
||
r = np.array(R_m2c)
|
||
r.shape = (3, 3)
|
||
t = np.array(t_m2c)
|
||
t.shape = (3, 1)
|
||
T = np.concatenate((r, t), axis=1)
|
||
|
||
P_m = np.array(xyz)
|
||
P_m.resize(4)
|
||
P_m[-1] = 1.0
|
||
P_m.shape = (4, 1)
|
||
|
||
# Project (X, Y, Z, 1) into cameras coordinate system
|
||
P_c = T @ P_m # 4x1
|
||
# Apply camera intrinsics to map (Xc, Yc, Zc) to p=(x, y, z)
|
||
p = K @ P_c
|
||
# Normalize by z to get (u,v,1)
|
||
uv = (p / p[2][0])[:-1]
|
||
return uv.flatten().tolist()
|
||
|
||
def gt_parse(path: str, out_dir: str):
|
||
global nn_image
|
||
with open(os.path.join(path, FILE_GT_COCO), "r") as fh:
|
||
coco_data = json.load(fh)
|
||
with open(os.path.join(path, FILE_GT), "r") as fh:
|
||
gt_data = json.load(fh)
|
||
with open(os.path.join(path, FILE_GT_INFO), "r") as fh:
|
||
gt_info = json.load(fh)
|
||
|
||
for img in coco_data["images"]:
|
||
rgb_file = os.path.join(path, img["file_name"])
|
||
if os.path.isfile(rgb_file):
|
||
# if Own_Numbering_Files:
|
||
ext = os.path.splitext(rgb_file)[1] # only ext
|
||
f = f"{nn_image:06}"
|
||
out_img = os.path.join(out_dir, f + ext)
|
||
# else:
|
||
# f = os.path.split(rgb_file)[1] # filename with extension
|
||
# f = os.path.splitext(f)[0] # only filename
|
||
# out_img = out_dir
|
||
shutil.copy2(rgb_file, out_img)
|
||
out_file = os.path.join(out_dir,f+".json")
|
||
nn_image += 1
|
||
|
||
# full annotation of the one image
|
||
all_data = camera_data.copy()
|
||
cat_names = {obj["id"]: obj["name"] for obj in coco_data["categories"]}
|
||
id_img = img["id"] # 0, 1, 2 ...
|
||
sid_img = str(id_img) # "0", "1", "2" ...
|
||
img_info = gt_info[sid_img]
|
||
img_gt = gt_data[sid_img]
|
||
img_idx = 0 # object index on the image
|
||
objs = []
|
||
for ann in coco_data["annotations"]:
|
||
if ann["image_id"] == id_img:
|
||
item = ann["category_id"]
|
||
obj_data = {}
|
||
obj_data["class"] = cat_names[item]
|
||
x, y, width, height = ann["bbox"]
|
||
obj_data["bounding_box"] = {"top_left":[x,y], "bottom_right":[x+width,y+height]}
|
||
|
||
# visibility from FILE_GT_INFO
|
||
item_info = img_info[img_idx]
|
||
obj_data["visibility"] = item_info["visib_fract"]
|
||
|
||
# location from FILE_GT
|
||
item_gt = img_gt[img_idx]
|
||
obj_id = item_gt["obj_id"] - 1 # index with 0
|
||
cam_R_m2c = item_gt["cam_R_m2c"]
|
||
cam_t_m2c = item_gt["cam_t_m2c"]
|
||
obj_data["location"] = cam_t_m2c
|
||
q = t3d.quaternions.mat2quat(np.array(cam_R_m2c))
|
||
obj_data["quaternion_xyzw"] = [q[1], q[2], q[3], q[0]]
|
||
|
||
cuboid_xyz = model_info[obj_id]
|
||
obj_data["projected_cuboid"] = [
|
||
trans_3Dto2D_point_in_camera(cub, K_intrinsic, cam_R_m2c, cam_t_m2c)
|
||
for cub in cuboid_xyz
|
||
]
|
||
|
||
objs.append(obj_data)
|
||
img_idx += 1
|
||
|
||
all_data["objects"] = objs
|
||
with open(out_file, "w") as fh:
|
||
json.dump(all_data, fh, indent=2)
|
||
|
||
def explore(path: str, res_dir: str):
|
||
if not os.path.isdir(path):
|
||
return
|
||
folders = [
|
||
os.path.join(path, o)
|
||
for o in os.listdir(path)
|
||
if os.path.isdir(os.path.join(path, o))
|
||
]
|
||
for path_entry in folders:
|
||
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)) and \
|
||
os.path.isfile(os.path.join(path_entry,FILE_GT_INFO)) and \
|
||
os.path.isfile(os.path.join(path_entry,FILE_GT)):
|
||
gt_parse(path_entry, res_dir)
|
||
else:
|
||
explore(path_entry, res_dir)
|
||
|
||
def BOP2DOPE_dataset(dpath: str, out_dir: str) -> str:
|
||
""" Convert BOP-dataset to YOLO format for train """
|
||
res_dir = os.path.join(out_dir, DIR_ROOT_DS)
|
||
if os.path.isdir(res_dir):
|
||
shutil.rmtree(res_dir)
|
||
os.mkdir(res_dir)
|
||
|
||
explore(dpath, res_dir)
|
||
|
||
return out_dir
|
||
|
||
def train(dopepath:str, wname:str, epochs:int, pretrain: bool, lname: list):
|
||
import random
|
||
# try:
|
||
import configparser as configparser
|
||
# except ImportError:
|
||
# import ConfigParser as configparser
|
||
import torch
|
||
# import torch.nn.parallel
|
||
import torch.optim as optim
|
||
import torch.utils.data
|
||
import torchvision.transforms as transforms
|
||
from torch.autograd import Variable
|
||
import datetime
|
||
from tensorboardX import SummaryWriter
|
||
|
||
from models_dope import DopeNetwork
|
||
from utils_dope import CleanVisiiDopeLoader #, VisualizeBeliefMap, save_image
|
||
|
||
import warnings
|
||
warnings.filterwarnings("ignore")
|
||
|
||
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
|
||
|
||
torch.autograd.set_detect_anomaly(False)
|
||
torch.autograd.profiler.profile(False)
|
||
torch.autograd.gradcheck = False
|
||
torch.backends.cudnn.benchmark = True
|
||
|
||
start_time = datetime.datetime.now()
|
||
print("start:", start_time.strftime("%m/%d/%Y, %H:%M:%S"))
|
||
|
||
res_model = os.path.join(dopepath, wname + EXT_MODEL)
|
||
|
||
local_rank = 0
|
||
opt = lambda: None
|
||
opt.use_s3 = False
|
||
opt.train_buckets = []
|
||
opt.endpoint = None
|
||
opt.lr=0.0001
|
||
opt.loginterval=100
|
||
opt.sigma=0.5 # 4
|
||
opt.nbupdates=None
|
||
# opt.save=False
|
||
# opt.option="default"
|
||
# opt.gpuids=[0]
|
||
|
||
opt.namefile=FILE_MODEL
|
||
opt.workers=8
|
||
opt.batchsize=16
|
||
|
||
opt.data = [os.path.join(dopepath, DIR_ROOT_DS)]
|
||
opt.outf = os.path.join(dopepath, DIR_TRAIN_OUT)
|
||
opt.object = lname #["fork"]
|
||
opt.exts = [EXT_RGB]
|
||
# opt.imagesize = im_width
|
||
opt.epochs = epochs
|
||
opt.pretrained = pretrain
|
||
opt.net_path = res_model if pretrain else None
|
||
opt.manualseed = random.randint(1, 10000)
|
||
|
||
# # Validate Arguments
|
||
# if opt.use_s3 and (opt.train_buckets is None or opt.endpoint is None):
|
||
# raise ValueError(
|
||
# "--train_buckets and --endpoint must be specified if training with data from s3 bucket."
|
||
# )
|
||
# if not opt.use_s3 and opt.data is None:
|
||
# raise ValueError("--data field must be specified.")
|
||
|
||
os.makedirs(opt.outf, exist_ok=True)
|
||
|
||
# if local_rank == 0:
|
||
# writer = SummaryWriter(opt.outf + "/runs/")
|
||
random.seed(opt.manualseed)
|
||
torch.cuda.set_device(local_rank)
|
||
# torch.distributed.init_process_group(backend="nccl", init_method="env://")
|
||
torch.manual_seed(opt.manualseed)
|
||
torch.cuda.manual_seed_all(opt.manualseed)
|
||
|
||
# # Data Augmentation
|
||
# if not opt.save:
|
||
# contrast = 0.2
|
||
# brightness = 0.2
|
||
# noise = 0.1
|
||
# normal_imgs = [0.59, 0.25]
|
||
# transform = transforms.Compose(
|
||
# [
|
||
# AddRandomContrast(0.2),
|
||
# AddRandomBrightness(0.2),
|
||
# transforms.Resize(opt.imagesize),
|
||
# ]
|
||
# )
|
||
# else:
|
||
# contrast = 0.00001
|
||
# brightness = 0.00001
|
||
# noise = 0.00001
|
||
# normal_imgs = None
|
||
# transform = transforms.Compose(
|
||
# [transforms.Resize(opt.imagesize), transforms.ToTensor()]
|
||
# )
|
||
|
||
# Load Model
|
||
net = DopeNetwork()
|
||
output_size = 50
|
||
# opt.sigma = 0.5
|
||
|
||
train_dataset = CleanVisiiDopeLoader(
|
||
opt.data,
|
||
sigma=opt.sigma,
|
||
output_size=output_size,
|
||
extensions=opt.exts,
|
||
objects=opt.object,
|
||
use_s3=opt.use_s3,
|
||
buckets=opt.train_buckets,
|
||
endpoint_url=opt.endpoint,
|
||
)
|
||
trainingdata = torch.utils.data.DataLoader(
|
||
train_dataset,
|
||
batch_size=opt.batchsize,
|
||
shuffle=True,
|
||
num_workers=opt.workers,
|
||
pin_memory=True,
|
||
)
|
||
if not trainingdata is None:
|
||
print(f"training data: {len(trainingdata)} batches")
|
||
|
||
print("Loading Model...")
|
||
net = net.cuda()
|
||
# net = torch.nn.parallel.DistributedDataParallel(
|
||
# net.cuda(), device_ids=[local_rank], output_device=local_rank
|
||
# )
|
||
if opt.pretrained:
|
||
if opt.net_path is not None:
|
||
net.load_state_dict(torch.load(opt.net_path))
|
||
else:
|
||
print("Error: Did not specify path to pretrained weights.")
|
||
quit()
|
||
|
||
parameters = filter(lambda p: p.requires_grad, net.parameters())
|
||
optimizer = optim.Adam(parameters, lr=opt.lr)
|
||
|
||
print("ready to train!")
|
||
|
||
global nb_update_network
|
||
nb_update_network = 0
|
||
# best_results = {"epoch": None, "passed": None, "add_mean": None, "add_std": None}
|
||
|
||
scaler = torch.cuda.amp.GradScaler()
|
||
|
||
def _runnetwork(epoch, train_loader): #, syn=False
|
||
global nb_update_network
|
||
# net
|
||
net.train()
|
||
|
||
loss_avg_to_log = {}
|
||
loss_avg_to_log["loss"] = []
|
||
loss_avg_to_log["loss_affinities"] = []
|
||
loss_avg_to_log["loss_belief"] = []
|
||
loss_avg_to_log["loss_class"] = []
|
||
for batch_idx, targets in enumerate(train_loader):
|
||
optimizer.zero_grad()
|
||
|
||
data = Variable(targets["img"].cuda())
|
||
target_belief = Variable(targets["beliefs"].cuda())
|
||
target_affinities = Variable(targets["affinities"].cuda())
|
||
|
||
output_belief, output_aff = net(data)
|
||
|
||
loss = None
|
||
|
||
loss_belief = torch.tensor(0).float().cuda()
|
||
loss_affinities = torch.tensor(0).float().cuda()
|
||
loss_class = torch.tensor(0).float().cuda()
|
||
|
||
for stage in range(len(output_aff)): # output, each belief map layers.
|
||
loss_affinities += (
|
||
(output_aff[stage] - target_affinities)
|
||
* (output_aff[stage] - target_affinities)
|
||
).mean()
|
||
|
||
loss_belief += (
|
||
(output_belief[stage] - target_belief)
|
||
* (output_belief[stage] - target_belief)
|
||
).mean()
|
||
|
||
loss = loss_affinities + loss_belief
|
||
|
||
# if batch_idx == 0:
|
||
# post = "train"
|
||
# if local_rank == 0:
|
||
# for i_output in range(1):
|
||
# # input images
|
||
# writer.add_image(
|
||
# f"{post}_input_{i_output}",
|
||
# targets["img_original"][i_output],
|
||
# epoch,
|
||
# dataformats="CWH",
|
||
# )
|
||
# # belief maps gt
|
||
# imgs = VisualizeBeliefMap(target_belief[i_output])
|
||
# img, grid = save_image(
|
||
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
|
||
# )
|
||
# writer.add_image(
|
||
# f"{post}_belief_ground_truth_{i_output}",
|
||
# grid,
|
||
# epoch,
|
||
# dataformats="CWH",
|
||
# )
|
||
# # belief maps guess
|
||
# imgs = VisualizeBeliefMap(output_belief[-1][i_output])
|
||
# img, grid = save_image(
|
||
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
|
||
# )
|
||
# writer.add_image(
|
||
# f"{post}_belief_guess_{i_output}",
|
||
# grid,
|
||
# epoch,
|
||
# dataformats="CWH",
|
||
# )
|
||
|
||
loss.backward()
|
||
|
||
optimizer.step()
|
||
|
||
nb_update_network += 1
|
||
|
||
# log the loss
|
||
loss_avg_to_log["loss"].append(loss.item())
|
||
loss_avg_to_log["loss_class"].append(loss_class.item())
|
||
loss_avg_to_log["loss_affinities"].append(loss_affinities.item())
|
||
loss_avg_to_log["loss_belief"].append(loss_belief.item())
|
||
|
||
if batch_idx % opt.loginterval == 0:
|
||
print(
|
||
"Train Epoch: {} [{}/{} ({:.0f}%)] \tLoss: {:.15f} \tLocal Rank: {}".format(
|
||
epoch,
|
||
batch_idx * len(data),
|
||
len(train_loader.dataset),
|
||
100.0 * batch_idx / len(train_loader),
|
||
loss.item(),
|
||
local_rank,
|
||
)
|
||
)
|
||
# # log the loss values
|
||
# if local_rank == 0:
|
||
# writer.add_scalar("loss/train_loss", np.mean(loss_avg_to_log["loss"]), epoch)
|
||
# writer.add_scalar("loss/train_cls", np.mean(loss_avg_to_log["loss_class"]), epoch)
|
||
# writer.add_scalar("loss/train_aff", np.mean(loss_avg_to_log["loss_affinities"]), epoch)
|
||
# writer.add_scalar("loss/train_bel", np.mean(loss_avg_to_log["loss_belief"]), epoch)
|
||
|
||
for epoch in range(1, opt.epochs + 1):
|
||
|
||
_runnetwork(epoch, trainingdata)
|
||
|
||
try:
|
||
if local_rank == 0:
|
||
torch.save(
|
||
net.state_dict(),
|
||
f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}.pth",
|
||
)
|
||
except Exception as e:
|
||
print(f"Encountered Exception: {e}")
|
||
|
||
if not opt.nbupdates is None and nb_update_network > int(opt.nbupdates):
|
||
break
|
||
|
||
# if local_rank == 0:
|
||
# save result model
|
||
torch.save(net.state_dict(), res_model) #os.path.join(dopepath, wname + EXT_MODEL))
|
||
# else:
|
||
# torch.save(
|
||
# net.state_dict(),
|
||
# f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}_rank_{local_rank}.pth",
|
||
# )
|
||
|
||
print("end:", datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))
|
||
print("Total time taken: ", str(datetime.datetime.now() - start_time).split(".")[0])
|
||
|
||
def train_Dope_i(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool):
|
||
""" Main procedure for train DOPE model """
|
||
global K_intrinsic, model_info, camera_data, im_width
|
||
|
||
if not os.path.isdir(outpath):
|
||
print(f"Invalid output path '{outpath}'")
|
||
exit(-1)
|
||
out_dir = os.path.join(outpath, wname)
|
||
ds_path = os.path.join(path, dname)
|
||
|
||
if not os.path.isdir(ds_path):
|
||
print(f"{ds_path} : no BOP directory")
|
||
return ""
|
||
|
||
camera_json = os.path.join(ds_path, FILE_CAMERA)
|
||
if not os.path.isfile(camera_json):
|
||
print(f"{camera_json} : no intrinsic camera file")
|
||
return ""
|
||
|
||
rbs_info = os.path.join(ds_path, FILE_RBS_INFO)
|
||
if not os.path.isfile(rbs_info):
|
||
print(f"{rbs_info} : no dataset info file")
|
||
return ""
|
||
|
||
camera_data = {}
|
||
with open(camera_json, "r") as fh:
|
||
data = json.load(fh)
|
||
keys = ["cx","cy","fx","fy"]
|
||
intrinsic = {k: data[k] for k in keys}
|
||
im_height = data["height"]
|
||
im_width = data["width"]
|
||
camera_data["camera_data"] = dict(intrinsic=intrinsic, height=im_height, width=im_width)
|
||
K_intrinsic = [
|
||
[data["fx"], 0.0, data["cx"]],
|
||
[0.0, data["fy"], data["cy"]],
|
||
[0.0, 0.0, 1.0]
|
||
]
|
||
# calc cuboid + center
|
||
with open(rbs_info, "r") as fh:
|
||
info = json.load(fh)
|
||
# список имён объектов
|
||
list_name = list(map(lambda x: x["name"], info))
|
||
# in FILE_RBS_INFO model numbering from smallest to largest
|
||
model_info = []
|
||
for m_info in info:
|
||
cub = np.array(m_info["cuboid"]) * MODEL_SCALE
|
||
xyz_min = cub.min(axis=0)
|
||
xyz_max = cub.max(axis=0)
|
||
# [xc, yc, zc] # min + (max - min) / 2
|
||
center = []
|
||
for i in range(3):
|
||
center.append(xyz_min[i] + (xyz_max[i]- xyz_min[i]) / 2)
|
||
c = np.array(center, ndmin=2)
|
||
model_info.append(np.append(cub, c, axis=0))
|
||
|
||
if pretrain:
|
||
# продолжить обучение
|
||
if not os.path.isdir(out_dir):
|
||
print(f"No dir '{out_dir}'")
|
||
exit(-2)
|
||
dpath = out_dir
|
||
# model_path = os.path.join(dpath, wname + ".pt")
|
||
else:
|
||
# обучение сначала
|
||
if not os.path.isdir(out_dir):
|
||
os.mkdir(out_dir)
|
||
|
||
dpath = BOP2DOPE_dataset(ds_path, out_dir)
|
||
if len(dpath) == 0:
|
||
print(f"Error in convert dataset '{ds_path}' to '{outpath}'")
|
||
exit(-4)
|
||
# model_path = os.path.join(dpath, FILE_BASEMODEL)
|
||
|
||
# results = f"python train.py --local_rank 0 --data {dpath} --object fork" \
|
||
# + f" -e {epochs} --batchsize 16 --exts jpg --imagesize 640 --pretrained" \
|
||
# + " --net_path /home/shalenikol/fork_work/dope_training/output/weights_2996/net_epoch_47.pth"
|
||
# print(results)
|
||
train(dpath, wname, epochs, pretrain, list_name)
|
||
|
||
import argparse
|
||
|
||
if __name__ == "__main__":
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--path", required=True, help="Path for dataset")
|
||
parser.add_argument("--name", required=True, help="String with result weights name")
|
||
parser.add_argument("--datasetName", required=True, help="String with dataset name")
|
||
parser.add_argument("--outpath", default="weights", help="Output path for weights")
|
||
parser.add_argument("--epoch", default=3, help="How many training epochs")
|
||
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
|
||
args = parser.parse_args()
|
||
|
||
train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)
|
||
|
||
|
||
|