framework/train_models/train_Dope.py

543 lines
20 KiB
Python
Raw Permalink Normal View History

"""
train_Dope
Общая задача: оценка позиции объекта (Pose estimation)
Реализуемая функция: обучение нейросетевой модели DOPE по заданному BOP-датасету
python3 $PYTHON_EDUCATION --path /Users/user/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \
--name test1234 --datasetName 32123213
08.05.2024 @shalenikol release 0.1
"""
import os
import json
import shutil
import numpy as np
import transforms3d as t3d
FILE_RBS_INFO = "rbs_info.json"
FILE_CAMERA = "camera.json"
FILE_GT = "scene_gt.json"
FILE_GT_COCO = "scene_gt_coco.json"
FILE_GT_INFO = "scene_gt_info.json"
FILE_MODEL = "epoch"
EXT_MODEL = ".pth"
EXT_RGB = "jpg"
DIR_ROOT_DS = "dataset_dope"
DIR_TRAIN_OUT = "out_weights"
MODEL_SCALE = 1000 # исходная модель в метрах, преобразуем в мм (для DOPE)
# Own_Numbering_Files = True # наименование image-файлов: собственная нумерация
nn_image = 0
K_intrinsic = []
model_info = []
camera_data = {}
im_width = 0
nb_update_network = 0
# [
# [min(x), min(y), min(z)],
# [min(x), max(y), min(z)],
# [min(x), max(y), max(z)],
# [min(x), min(y), max(z)],
# [max(x), min(y), max(z)],
# [max(x), max(y), min(z)],
# [max(x), max(y), max(z)],
# [max(x), min(y), max(z)],
# [xc, yc, zc] # min + (max - min) / 2
# ]
def trans_3Dto2D_point_in_camera(xyz, K_m, R_m2c, t_m2c):
"""
xyz : 3D-координаты точки
K_m : внутренняя матрица камеры 3х3
R_m2c : матрица поворота 3х3
t_m2c : вектор перемещения 3х1
return [u,v]
"""
K = np.array(K_m)
r = np.array(R_m2c)
r.shape = (3, 3)
t = np.array(t_m2c)
t.shape = (3, 1)
T = np.concatenate((r, t), axis=1)
P_m = np.array(xyz)
P_m.resize(4)
P_m[-1] = 1.0
P_m.shape = (4, 1)
# Project (X, Y, Z, 1) into cameras coordinate system
P_c = T @ P_m # 4x1
# Apply camera intrinsics to map (Xc, Yc, Zc) to p=(x, y, z)
p = K @ P_c
# Normalize by z to get (u,v,1)
uv = (p / p[2][0])[:-1]
return uv.flatten().tolist()
def gt_parse(path: str, out_dir: str):
global nn_image
with open(os.path.join(path, FILE_GT_COCO), "r") as fh:
coco_data = json.load(fh)
with open(os.path.join(path, FILE_GT), "r") as fh:
gt_data = json.load(fh)
with open(os.path.join(path, FILE_GT_INFO), "r") as fh:
gt_info = json.load(fh)
for img in coco_data["images"]:
rgb_file = os.path.join(path, img["file_name"])
if os.path.isfile(rgb_file):
# if Own_Numbering_Files:
ext = os.path.splitext(rgb_file)[1] # only ext
f = f"{nn_image:06}"
out_img = os.path.join(out_dir, f + ext)
# else:
# f = os.path.split(rgb_file)[1] # filename with extension
# f = os.path.splitext(f)[0] # only filename
# out_img = out_dir
shutil.copy2(rgb_file, out_img)
out_file = os.path.join(out_dir,f+".json")
nn_image += 1
# full annotation of the one image
all_data = camera_data.copy()
cat_names = {obj["id"]: obj["name"] for obj in coco_data["categories"]}
id_img = img["id"] # 0, 1, 2 ...
sid_img = str(id_img) # "0", "1", "2" ...
img_info = gt_info[sid_img]
img_gt = gt_data[sid_img]
img_idx = 0 # object index on the image
objs = []
for ann in coco_data["annotations"]:
if ann["image_id"] == id_img:
item = ann["category_id"]
obj_data = {}
obj_data["class"] = cat_names[item]
x, y, width, height = ann["bbox"]
obj_data["bounding_box"] = {"top_left":[x,y], "bottom_right":[x+width,y+height]}
# visibility from FILE_GT_INFO
item_info = img_info[img_idx]
obj_data["visibility"] = item_info["visib_fract"]
# location from FILE_GT
item_gt = img_gt[img_idx]
obj_id = item_gt["obj_id"] - 1 # index with 0
cam_R_m2c = item_gt["cam_R_m2c"]
cam_t_m2c = item_gt["cam_t_m2c"]
obj_data["location"] = cam_t_m2c
q = t3d.quaternions.mat2quat(np.array(cam_R_m2c))
obj_data["quaternion_xyzw"] = [q[1], q[2], q[3], q[0]]
cuboid_xyz = model_info[obj_id]
obj_data["projected_cuboid"] = [
trans_3Dto2D_point_in_camera(cub, K_intrinsic, cam_R_m2c, cam_t_m2c)
for cub in cuboid_xyz
]
objs.append(obj_data)
img_idx += 1
all_data["objects"] = objs
with open(out_file, "w") as fh:
json.dump(all_data, fh, indent=2)
def explore(path: str, res_dir: str):
if not os.path.isdir(path):
return
folders = [
os.path.join(path, o)
for o in os.listdir(path)
if os.path.isdir(os.path.join(path, o))
]
for path_entry in folders:
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)) and \
os.path.isfile(os.path.join(path_entry,FILE_GT_INFO)) and \
os.path.isfile(os.path.join(path_entry,FILE_GT)):
gt_parse(path_entry, res_dir)
else:
explore(path_entry, res_dir)
def BOP2DOPE_dataset(dpath: str, out_dir: str) -> str:
""" Convert BOP-dataset to YOLO format for train """
res_dir = os.path.join(out_dir, DIR_ROOT_DS)
if os.path.isdir(res_dir):
shutil.rmtree(res_dir)
os.mkdir(res_dir)
explore(dpath, res_dir)
return out_dir
def train(dopepath:str, wname:str, epochs:int, pretrain: bool, lname: list):
import random
# try:
import configparser as configparser
# except ImportError:
# import ConfigParser as configparser
import torch
# import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
from torch.autograd import Variable
import datetime
from tensorboardX import SummaryWriter
from models_dope import DopeNetwork
from utils_dope import CleanVisiiDopeLoader #, VisualizeBeliefMap, save_image
import warnings
warnings.filterwarnings("ignore")
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
torch.autograd.set_detect_anomaly(False)
torch.autograd.profiler.profile(False)
torch.autograd.gradcheck = False
torch.backends.cudnn.benchmark = True
start_time = datetime.datetime.now()
print("start:", start_time.strftime("%m/%d/%Y, %H:%M:%S"))
res_model = os.path.join(dopepath, wname + EXT_MODEL)
local_rank = 0
opt = lambda: None
opt.use_s3 = False
opt.train_buckets = []
opt.endpoint = None
opt.lr=0.0001
opt.loginterval=100
opt.sigma=0.5 # 4
opt.nbupdates=None
# opt.save=False
# opt.option="default"
# opt.gpuids=[0]
opt.namefile=FILE_MODEL
opt.workers=8
opt.batchsize=16
opt.data = [os.path.join(dopepath, DIR_ROOT_DS)]
opt.outf = os.path.join(dopepath, DIR_TRAIN_OUT)
opt.object = lname #["fork"]
opt.exts = [EXT_RGB]
# opt.imagesize = im_width
opt.epochs = epochs
opt.pretrained = pretrain
opt.net_path = res_model if pretrain else None
opt.manualseed = random.randint(1, 10000)
# # Validate Arguments
# if opt.use_s3 and (opt.train_buckets is None or opt.endpoint is None):
# raise ValueError(
# "--train_buckets and --endpoint must be specified if training with data from s3 bucket."
# )
# if not opt.use_s3 and opt.data is None:
# raise ValueError("--data field must be specified.")
os.makedirs(opt.outf, exist_ok=True)
# if local_rank == 0:
# writer = SummaryWriter(opt.outf + "/runs/")
random.seed(opt.manualseed)
torch.cuda.set_device(local_rank)
# torch.distributed.init_process_group(backend="nccl", init_method="env://")
torch.manual_seed(opt.manualseed)
torch.cuda.manual_seed_all(opt.manualseed)
# # Data Augmentation
# if not opt.save:
# contrast = 0.2
# brightness = 0.2
# noise = 0.1
# normal_imgs = [0.59, 0.25]
# transform = transforms.Compose(
# [
# AddRandomContrast(0.2),
# AddRandomBrightness(0.2),
# transforms.Resize(opt.imagesize),
# ]
# )
# else:
# contrast = 0.00001
# brightness = 0.00001
# noise = 0.00001
# normal_imgs = None
# transform = transforms.Compose(
# [transforms.Resize(opt.imagesize), transforms.ToTensor()]
# )
# Load Model
net = DopeNetwork()
output_size = 50
# opt.sigma = 0.5
train_dataset = CleanVisiiDopeLoader(
opt.data,
sigma=opt.sigma,
output_size=output_size,
extensions=opt.exts,
objects=opt.object,
use_s3=opt.use_s3,
buckets=opt.train_buckets,
endpoint_url=opt.endpoint,
)
trainingdata = torch.utils.data.DataLoader(
train_dataset,
batch_size=opt.batchsize,
shuffle=True,
num_workers=opt.workers,
pin_memory=True,
)
if not trainingdata is None:
print(f"training data: {len(trainingdata)} batches")
print("Loading Model...")
net = net.cuda()
# net = torch.nn.parallel.DistributedDataParallel(
# net.cuda(), device_ids=[local_rank], output_device=local_rank
# )
if opt.pretrained:
if opt.net_path is not None:
net.load_state_dict(torch.load(opt.net_path))
else:
print("Error: Did not specify path to pretrained weights.")
quit()
parameters = filter(lambda p: p.requires_grad, net.parameters())
optimizer = optim.Adam(parameters, lr=opt.lr)
print("ready to train!")
global nb_update_network
nb_update_network = 0
# best_results = {"epoch": None, "passed": None, "add_mean": None, "add_std": None}
scaler = torch.cuda.amp.GradScaler()
def _runnetwork(epoch, train_loader): #, syn=False
global nb_update_network
# net
net.train()
loss_avg_to_log = {}
loss_avg_to_log["loss"] = []
loss_avg_to_log["loss_affinities"] = []
loss_avg_to_log["loss_belief"] = []
loss_avg_to_log["loss_class"] = []
for batch_idx, targets in enumerate(train_loader):
optimizer.zero_grad()
data = Variable(targets["img"].cuda())
target_belief = Variable(targets["beliefs"].cuda())
target_affinities = Variable(targets["affinities"].cuda())
output_belief, output_aff = net(data)
loss = None
loss_belief = torch.tensor(0).float().cuda()
loss_affinities = torch.tensor(0).float().cuda()
loss_class = torch.tensor(0).float().cuda()
for stage in range(len(output_aff)): # output, each belief map layers.
loss_affinities += (
(output_aff[stage] - target_affinities)
* (output_aff[stage] - target_affinities)
).mean()
loss_belief += (
(output_belief[stage] - target_belief)
* (output_belief[stage] - target_belief)
).mean()
loss = loss_affinities + loss_belief
# if batch_idx == 0:
# post = "train"
# if local_rank == 0:
# for i_output in range(1):
# # input images
# writer.add_image(
# f"{post}_input_{i_output}",
# targets["img_original"][i_output],
# epoch,
# dataformats="CWH",
# )
# # belief maps gt
# imgs = VisualizeBeliefMap(target_belief[i_output])
# img, grid = save_image(
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
# )
# writer.add_image(
# f"{post}_belief_ground_truth_{i_output}",
# grid,
# epoch,
# dataformats="CWH",
# )
# # belief maps guess
# imgs = VisualizeBeliefMap(output_belief[-1][i_output])
# img, grid = save_image(
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
# )
# writer.add_image(
# f"{post}_belief_guess_{i_output}",
# grid,
# epoch,
# dataformats="CWH",
# )
loss.backward()
optimizer.step()
nb_update_network += 1
# log the loss
loss_avg_to_log["loss"].append(loss.item())
loss_avg_to_log["loss_class"].append(loss_class.item())
loss_avg_to_log["loss_affinities"].append(loss_affinities.item())
loss_avg_to_log["loss_belief"].append(loss_belief.item())
if batch_idx % opt.loginterval == 0:
print(
"Train Epoch: {} [{}/{} ({:.0f}%)] \tLoss: {:.15f} \tLocal Rank: {}".format(
epoch,
batch_idx * len(data),
len(train_loader.dataset),
100.0 * batch_idx / len(train_loader),
loss.item(),
local_rank,
)
)
# # log the loss values
# if local_rank == 0:
# writer.add_scalar("loss/train_loss", np.mean(loss_avg_to_log["loss"]), epoch)
# writer.add_scalar("loss/train_cls", np.mean(loss_avg_to_log["loss_class"]), epoch)
# writer.add_scalar("loss/train_aff", np.mean(loss_avg_to_log["loss_affinities"]), epoch)
# writer.add_scalar("loss/train_bel", np.mean(loss_avg_to_log["loss_belief"]), epoch)
for epoch in range(1, opt.epochs + 1):
_runnetwork(epoch, trainingdata)
try:
if local_rank == 0:
torch.save(
net.state_dict(),
f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}.pth",
)
except Exception as e:
print(f"Encountered Exception: {e}")
if not opt.nbupdates is None and nb_update_network > int(opt.nbupdates):
break
# if local_rank == 0:
# save result model
torch.save(net.state_dict(), res_model) #os.path.join(dopepath, wname + EXT_MODEL))
# else:
# torch.save(
# net.state_dict(),
# f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}_rank_{local_rank}.pth",
# )
print("end:", datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))
print("Total time taken: ", str(datetime.datetime.now() - start_time).split(".")[0])
def train_Dope_i(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool):
""" Main procedure for train DOPE model """
global K_intrinsic, model_info, camera_data, im_width
if not os.path.isdir(outpath):
print(f"Invalid output path '{outpath}'")
exit(-1)
out_dir = os.path.join(outpath, wname)
ds_path = os.path.join(path, dname)
if not os.path.isdir(ds_path):
print(f"{ds_path} : no BOP directory")
return ""
camera_json = os.path.join(ds_path, FILE_CAMERA)
if not os.path.isfile(camera_json):
print(f"{camera_json} : no intrinsic camera file")
return ""
rbs_info = os.path.join(ds_path, FILE_RBS_INFO)
if not os.path.isfile(rbs_info):
print(f"{rbs_info} : no dataset info file")
return ""
camera_data = {}
with open(camera_json, "r") as fh:
data = json.load(fh)
keys = ["cx","cy","fx","fy"]
intrinsic = {k: data[k] for k in keys}
im_height = data["height"]
im_width = data["width"]
camera_data["camera_data"] = dict(intrinsic=intrinsic, height=im_height, width=im_width)
K_intrinsic = [
[data["fx"], 0.0, data["cx"]],
[0.0, data["fy"], data["cy"]],
[0.0, 0.0, 1.0]
]
# calc cuboid + center
with open(rbs_info, "r") as fh:
info = json.load(fh)
# список имён объектов
list_name = list(map(lambda x: x["name"], info))
# in FILE_RBS_INFO model numbering from smallest to largest
model_info = []
for m_info in info:
cub = np.array(m_info["cuboid"]) * MODEL_SCALE
xyz_min = cub.min(axis=0)
xyz_max = cub.max(axis=0)
# [xc, yc, zc] # min + (max - min) / 2
center = []
for i in range(3):
center.append(xyz_min[i] + (xyz_max[i]- xyz_min[i]) / 2)
c = np.array(center, ndmin=2)
model_info.append(np.append(cub, c, axis=0))
if pretrain:
# продолжить обучение
if not os.path.isdir(out_dir):
print(f"No dir '{out_dir}'")
exit(-2)
dpath = out_dir
# model_path = os.path.join(dpath, wname + ".pt")
else:
# обучение сначала
if not os.path.isdir(out_dir):
os.mkdir(out_dir)
dpath = BOP2DOPE_dataset(ds_path, out_dir)
if len(dpath) == 0:
print(f"Error in convert dataset '{ds_path}' to '{outpath}'")
exit(-4)
# model_path = os.path.join(dpath, FILE_BASEMODEL)
# results = f"python train.py --local_rank 0 --data {dpath} --object fork" \
# + f" -e {epochs} --batchsize 16 --exts jpg --imagesize 640 --pretrained" \
# + " --net_path /home/shalenikol/fork_work/dope_training/output/weights_2996/net_epoch_47.pth"
# print(results)
train(dpath, wname, epochs, pretrain, list_name)
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--path", required=True, help="Path for dataset")
parser.add_argument("--name", required=True, help="String with result weights name")
parser.add_argument("--datasetName", required=True, help="String with dataset name")
parser.add_argument("--outpath", default="weights", help="Output path for weights")
parser.add_argument("--epoch", default=3, help="How many training epochs")
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
args = parser.parse_args()
train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)