""" train_Dope Общая задача: оценка позиции объекта (Pose estimation) Реализуемая функция: обучение нейросетевой модели DOPE по заданному BOP-датасету python3 $PYTHON_EDUCATION --path /Users/user/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \ --name test1234 --datasetName 32123213 08.05.2024 @shalenikol release 0.1 """ import os import json import shutil import numpy as np import transforms3d as t3d FILE_RBS_INFO = "rbs_info.json" FILE_CAMERA = "camera.json" FILE_GT = "scene_gt.json" FILE_GT_COCO = "scene_gt_coco.json" FILE_GT_INFO = "scene_gt_info.json" FILE_MODEL = "epoch" EXT_MODEL = ".pth" EXT_RGB = "jpg" DIR_ROOT_DS = "dataset_dope" DIR_TRAIN_OUT = "out_weights" MODEL_SCALE = 1000 # исходная модель в метрах, преобразуем в мм (для DOPE) # Own_Numbering_Files = True # наименование image-файлов: собственная нумерация nn_image = 0 K_intrinsic = [] model_info = [] camera_data = {} im_width = 0 nb_update_network = 0 # [ # [min(x), min(y), min(z)], # [min(x), max(y), min(z)], # [min(x), max(y), max(z)], # [min(x), min(y), max(z)], # [max(x), min(y), max(z)], # [max(x), max(y), min(z)], # [max(x), max(y), max(z)], # [max(x), min(y), max(z)], # [xc, yc, zc] # min + (max - min) / 2 # ] def trans_3Dto2D_point_in_camera(xyz, K_m, R_m2c, t_m2c): """ xyz : 3D-координаты точки K_m : внутренняя матрица камеры 3х3 R_m2c : матрица поворота 3х3 t_m2c : вектор перемещения 3х1 return [u,v] """ K = np.array(K_m) r = np.array(R_m2c) r.shape = (3, 3) t = np.array(t_m2c) t.shape = (3, 1) T = np.concatenate((r, t), axis=1) P_m = np.array(xyz) P_m.resize(4) P_m[-1] = 1.0 P_m.shape = (4, 1) # Project (X, Y, Z, 1) into cameras coordinate system P_c = T @ P_m # 4x1 # Apply camera intrinsics to map (Xc, Yc, Zc) to p=(x, y, z) p = K @ P_c # Normalize by z to get (u,v,1) uv = (p / p[2][0])[:-1] return uv.flatten().tolist() def gt_parse(path: str, out_dir: str): global nn_image with open(os.path.join(path, FILE_GT_COCO), "r") as fh: coco_data = json.load(fh) with open(os.path.join(path, FILE_GT), "r") as fh: gt_data = json.load(fh) with open(os.path.join(path, FILE_GT_INFO), "r") as fh: gt_info = json.load(fh) for img in coco_data["images"]: rgb_file = os.path.join(path, img["file_name"]) if os.path.isfile(rgb_file): # if Own_Numbering_Files: ext = os.path.splitext(rgb_file)[1] # only ext f = f"{nn_image:06}" out_img = os.path.join(out_dir, f + ext) # else: # f = os.path.split(rgb_file)[1] # filename with extension # f = os.path.splitext(f)[0] # only filename # out_img = out_dir shutil.copy2(rgb_file, out_img) out_file = os.path.join(out_dir,f+".json") nn_image += 1 # full annotation of the one image all_data = camera_data.copy() cat_names = {obj["id"]: obj["name"] for obj in coco_data["categories"]} id_img = img["id"] # 0, 1, 2 ... sid_img = str(id_img) # "0", "1", "2" ... img_info = gt_info[sid_img] img_gt = gt_data[sid_img] img_idx = 0 # object index on the image objs = [] for ann in coco_data["annotations"]: if ann["image_id"] == id_img: item = ann["category_id"] obj_data = {} obj_data["class"] = cat_names[item] x, y, width, height = ann["bbox"] obj_data["bounding_box"] = {"top_left":[x,y], "bottom_right":[x+width,y+height]} # visibility from FILE_GT_INFO item_info = img_info[img_idx] obj_data["visibility"] = item_info["visib_fract"] # location from FILE_GT item_gt = img_gt[img_idx] obj_id = item_gt["obj_id"] - 1 # index with 0 cam_R_m2c = item_gt["cam_R_m2c"] cam_t_m2c = item_gt["cam_t_m2c"] obj_data["location"] = cam_t_m2c q = t3d.quaternions.mat2quat(np.array(cam_R_m2c)) obj_data["quaternion_xyzw"] = [q[1], q[2], q[3], q[0]] cuboid_xyz = model_info[obj_id] obj_data["projected_cuboid"] = [ trans_3Dto2D_point_in_camera(cub, K_intrinsic, cam_R_m2c, cam_t_m2c) for cub in cuboid_xyz ] objs.append(obj_data) img_idx += 1 all_data["objects"] = objs with open(out_file, "w") as fh: json.dump(all_data, fh, indent=2) def explore(path: str, res_dir: str): if not os.path.isdir(path): return folders = [ os.path.join(path, o) for o in os.listdir(path) if os.path.isdir(os.path.join(path, o)) ] for path_entry in folders: if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)) and \ os.path.isfile(os.path.join(path_entry,FILE_GT_INFO)) and \ os.path.isfile(os.path.join(path_entry,FILE_GT)): gt_parse(path_entry, res_dir) else: explore(path_entry, res_dir) def BOP2DOPE_dataset(dpath: str, out_dir: str) -> str: """ Convert BOP-dataset to YOLO format for train """ res_dir = os.path.join(out_dir, DIR_ROOT_DS) if os.path.isdir(res_dir): shutil.rmtree(res_dir) os.mkdir(res_dir) explore(dpath, res_dir) return out_dir def train(dopepath:str, wname:str, epochs:int, pretrain: bool, lname: list): import random # try: import configparser as configparser # except ImportError: # import ConfigParser as configparser import torch # import torch.nn.parallel import torch.optim as optim import torch.utils.data import torchvision.transforms as transforms from torch.autograd import Variable import datetime from tensorboardX import SummaryWriter from models_dope import DopeNetwork from utils_dope import CleanVisiiDopeLoader #, VisualizeBeliefMap, save_image import warnings warnings.filterwarnings("ignore") os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7" torch.autograd.set_detect_anomaly(False) torch.autograd.profiler.profile(False) torch.autograd.gradcheck = False torch.backends.cudnn.benchmark = True start_time = datetime.datetime.now() print("start:", start_time.strftime("%m/%d/%Y, %H:%M:%S")) res_model = os.path.join(dopepath, wname + EXT_MODEL) local_rank = 0 opt = lambda: None opt.use_s3 = False opt.train_buckets = [] opt.endpoint = None opt.lr=0.0001 opt.loginterval=100 opt.sigma=0.5 # 4 opt.nbupdates=None # opt.save=False # opt.option="default" # opt.gpuids=[0] opt.namefile=FILE_MODEL opt.workers=8 opt.batchsize=16 opt.data = [os.path.join(dopepath, DIR_ROOT_DS)] opt.outf = os.path.join(dopepath, DIR_TRAIN_OUT) opt.object = lname #["fork"] opt.exts = [EXT_RGB] # opt.imagesize = im_width opt.epochs = epochs opt.pretrained = pretrain opt.net_path = res_model if pretrain else None opt.manualseed = random.randint(1, 10000) # # Validate Arguments # if opt.use_s3 and (opt.train_buckets is None or opt.endpoint is None): # raise ValueError( # "--train_buckets and --endpoint must be specified if training with data from s3 bucket." # ) # if not opt.use_s3 and opt.data is None: # raise ValueError("--data field must be specified.") os.makedirs(opt.outf, exist_ok=True) # if local_rank == 0: # writer = SummaryWriter(opt.outf + "/runs/") random.seed(opt.manualseed) torch.cuda.set_device(local_rank) # torch.distributed.init_process_group(backend="nccl", init_method="env://") torch.manual_seed(opt.manualseed) torch.cuda.manual_seed_all(opt.manualseed) # # Data Augmentation # if not opt.save: # contrast = 0.2 # brightness = 0.2 # noise = 0.1 # normal_imgs = [0.59, 0.25] # transform = transforms.Compose( # [ # AddRandomContrast(0.2), # AddRandomBrightness(0.2), # transforms.Resize(opt.imagesize), # ] # ) # else: # contrast = 0.00001 # brightness = 0.00001 # noise = 0.00001 # normal_imgs = None # transform = transforms.Compose( # [transforms.Resize(opt.imagesize), transforms.ToTensor()] # ) # Load Model net = DopeNetwork() output_size = 50 # opt.sigma = 0.5 train_dataset = CleanVisiiDopeLoader( opt.data, sigma=opt.sigma, output_size=output_size, extensions=opt.exts, objects=opt.object, use_s3=opt.use_s3, buckets=opt.train_buckets, endpoint_url=opt.endpoint, ) trainingdata = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batchsize, shuffle=True, num_workers=opt.workers, pin_memory=True, ) if not trainingdata is None: print(f"training data: {len(trainingdata)} batches") print("Loading Model...") net = net.cuda() # net = torch.nn.parallel.DistributedDataParallel( # net.cuda(), device_ids=[local_rank], output_device=local_rank # ) if opt.pretrained: if opt.net_path is not None: net.load_state_dict(torch.load(opt.net_path)) else: print("Error: Did not specify path to pretrained weights.") quit() parameters = filter(lambda p: p.requires_grad, net.parameters()) optimizer = optim.Adam(parameters, lr=opt.lr) print("ready to train!") global nb_update_network nb_update_network = 0 # best_results = {"epoch": None, "passed": None, "add_mean": None, "add_std": None} scaler = torch.cuda.amp.GradScaler() def _runnetwork(epoch, train_loader): #, syn=False global nb_update_network # net net.train() loss_avg_to_log = {} loss_avg_to_log["loss"] = [] loss_avg_to_log["loss_affinities"] = [] loss_avg_to_log["loss_belief"] = [] loss_avg_to_log["loss_class"] = [] for batch_idx, targets in enumerate(train_loader): optimizer.zero_grad() data = Variable(targets["img"].cuda()) target_belief = Variable(targets["beliefs"].cuda()) target_affinities = Variable(targets["affinities"].cuda()) output_belief, output_aff = net(data) loss = None loss_belief = torch.tensor(0).float().cuda() loss_affinities = torch.tensor(0).float().cuda() loss_class = torch.tensor(0).float().cuda() for stage in range(len(output_aff)): # output, each belief map layers. loss_affinities += ( (output_aff[stage] - target_affinities) * (output_aff[stage] - target_affinities) ).mean() loss_belief += ( (output_belief[stage] - target_belief) * (output_belief[stage] - target_belief) ).mean() loss = loss_affinities + loss_belief # if batch_idx == 0: # post = "train" # if local_rank == 0: # for i_output in range(1): # # input images # writer.add_image( # f"{post}_input_{i_output}", # targets["img_original"][i_output], # epoch, # dataformats="CWH", # ) # # belief maps gt # imgs = VisualizeBeliefMap(target_belief[i_output]) # img, grid = save_image( # imgs, "some_img.png", mean=0, std=1, nrow=3, save=False # ) # writer.add_image( # f"{post}_belief_ground_truth_{i_output}", # grid, # epoch, # dataformats="CWH", # ) # # belief maps guess # imgs = VisualizeBeliefMap(output_belief[-1][i_output]) # img, grid = save_image( # imgs, "some_img.png", mean=0, std=1, nrow=3, save=False # ) # writer.add_image( # f"{post}_belief_guess_{i_output}", # grid, # epoch, # dataformats="CWH", # ) loss.backward() optimizer.step() nb_update_network += 1 # log the loss loss_avg_to_log["loss"].append(loss.item()) loss_avg_to_log["loss_class"].append(loss_class.item()) loss_avg_to_log["loss_affinities"].append(loss_affinities.item()) loss_avg_to_log["loss_belief"].append(loss_belief.item()) if batch_idx % opt.loginterval == 0: print( "Train Epoch: {} [{}/{} ({:.0f}%)] \tLoss: {:.15f} \tLocal Rank: {}".format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), local_rank, ) ) # # log the loss values # if local_rank == 0: # writer.add_scalar("loss/train_loss", np.mean(loss_avg_to_log["loss"]), epoch) # writer.add_scalar("loss/train_cls", np.mean(loss_avg_to_log["loss_class"]), epoch) # writer.add_scalar("loss/train_aff", np.mean(loss_avg_to_log["loss_affinities"]), epoch) # writer.add_scalar("loss/train_bel", np.mean(loss_avg_to_log["loss_belief"]), epoch) for epoch in range(1, opt.epochs + 1): _runnetwork(epoch, trainingdata) try: if local_rank == 0: torch.save( net.state_dict(), f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}.pth", ) except Exception as e: print(f"Encountered Exception: {e}") if not opt.nbupdates is None and nb_update_network > int(opt.nbupdates): break # if local_rank == 0: # save result model torch.save(net.state_dict(), res_model) #os.path.join(dopepath, wname + EXT_MODEL)) # else: # torch.save( # net.state_dict(), # f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}_rank_{local_rank}.pth", # ) print("end:", datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")) print("Total time taken: ", str(datetime.datetime.now() - start_time).split(".")[0]) def train_Dope_i(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool): """ Main procedure for train DOPE model """ global K_intrinsic, model_info, camera_data, im_width if not os.path.isdir(outpath): print(f"Invalid output path '{outpath}'") exit(-1) out_dir = os.path.join(outpath, wname) ds_path = os.path.join(path, dname) if not os.path.isdir(ds_path): print(f"{ds_path} : no BOP directory") return "" camera_json = os.path.join(ds_path, FILE_CAMERA) if not os.path.isfile(camera_json): print(f"{camera_json} : no intrinsic camera file") return "" rbs_info = os.path.join(ds_path, FILE_RBS_INFO) if not os.path.isfile(rbs_info): print(f"{rbs_info} : no dataset info file") return "" camera_data = {} with open(camera_json, "r") as fh: data = json.load(fh) keys = ["cx","cy","fx","fy"] intrinsic = {k: data[k] for k in keys} im_height = data["height"] im_width = data["width"] camera_data["camera_data"] = dict(intrinsic=intrinsic, height=im_height, width=im_width) K_intrinsic = [ [data["fx"], 0.0, data["cx"]], [0.0, data["fy"], data["cy"]], [0.0, 0.0, 1.0] ] # calc cuboid + center with open(rbs_info, "r") as fh: info = json.load(fh) # список имён объектов list_name = list(map(lambda x: x["name"], info)) # in FILE_RBS_INFO model numbering from smallest to largest model_info = [] for m_info in info: cub = np.array(m_info["cuboid"]) * MODEL_SCALE xyz_min = cub.min(axis=0) xyz_max = cub.max(axis=0) # [xc, yc, zc] # min + (max - min) / 2 center = [] for i in range(3): center.append(xyz_min[i] + (xyz_max[i]- xyz_min[i]) / 2) c = np.array(center, ndmin=2) model_info.append(np.append(cub, c, axis=0)) if pretrain: # продолжить обучение if not os.path.isdir(out_dir): print(f"No dir '{out_dir}'") exit(-2) dpath = out_dir # model_path = os.path.join(dpath, wname + ".pt") else: # обучение сначала if not os.path.isdir(out_dir): os.mkdir(out_dir) dpath = BOP2DOPE_dataset(ds_path, out_dir) if len(dpath) == 0: print(f"Error in convert dataset '{ds_path}' to '{outpath}'") exit(-4) # model_path = os.path.join(dpath, FILE_BASEMODEL) # results = f"python train.py --local_rank 0 --data {dpath} --object fork" \ # + f" -e {epochs} --batchsize 16 --exts jpg --imagesize 640 --pretrained" \ # + " --net_path /home/shalenikol/fork_work/dope_training/output/weights_2996/net_epoch_47.pth" # print(results) train(dpath, wname, epochs, pretrain, list_name) import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--path", required=True, help="Path for dataset") parser.add_argument("--name", required=True, help="String with result weights name") parser.add_argument("--datasetName", required=True, help="String with dataset name") parser.add_argument("--outpath", default="weights", help="Output path for weights") parser.add_argument("--epoch", default=3, help="How many training epochs") parser.add_argument('--pretrain', action="store_true", help="Use pretraining") args = parser.parse_args() train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)