framework/train_models/train_Dope.py

542 lines
20 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
train_Dope
Общая задача: оценка позиции объекта (Pose estimation)
Реализуемая функция: обучение нейросетевой модели DOPE по заданному BOP-датасету
python3 $PYTHON_EDUCATION --path /Users/user/webservice/server/build/public/7065d6b6-c8a3-48c5-9679-bb8f3a690296 \
--name test1234 --datasetName 32123213
08.05.2024 @shalenikol release 0.1
"""
import os
import json
import shutil
import numpy as np
import transforms3d as t3d
FILE_RBS_INFO = "rbs_info.json"
FILE_CAMERA = "camera.json"
FILE_GT = "scene_gt.json"
FILE_GT_COCO = "scene_gt_coco.json"
FILE_GT_INFO = "scene_gt_info.json"
FILE_MODEL = "epoch"
EXT_MODEL = ".pth"
EXT_RGB = "jpg"
DIR_ROOT_DS = "dataset_dope"
DIR_TRAIN_OUT = "out_weights"
MODEL_SCALE = 1000 # исходная модель в метрах, преобразуем в мм (для DOPE)
# Own_Numbering_Files = True # наименование image-файлов: собственная нумерация
nn_image = 0
K_intrinsic = []
model_info = []
camera_data = {}
im_width = 0
nb_update_network = 0
# [
# [min(x), min(y), min(z)],
# [min(x), max(y), min(z)],
# [min(x), max(y), max(z)],
# [min(x), min(y), max(z)],
# [max(x), min(y), max(z)],
# [max(x), max(y), min(z)],
# [max(x), max(y), max(z)],
# [max(x), min(y), max(z)],
# [xc, yc, zc] # min + (max - min) / 2
# ]
def trans_3Dto2D_point_in_camera(xyz, K_m, R_m2c, t_m2c):
"""
xyz : 3D-координаты точки
K_m : внутренняя матрица камеры 3х3
R_m2c : матрица поворота 3х3
t_m2c : вектор перемещения 3х1
return [u,v]
"""
K = np.array(K_m)
r = np.array(R_m2c)
r.shape = (3, 3)
t = np.array(t_m2c)
t.shape = (3, 1)
T = np.concatenate((r, t), axis=1)
P_m = np.array(xyz)
P_m.resize(4)
P_m[-1] = 1.0
P_m.shape = (4, 1)
# Project (X, Y, Z, 1) into cameras coordinate system
P_c = T @ P_m # 4x1
# Apply camera intrinsics to map (Xc, Yc, Zc) to p=(x, y, z)
p = K @ P_c
# Normalize by z to get (u,v,1)
uv = (p / p[2][0])[:-1]
return uv.flatten().tolist()
def gt_parse(path: str, out_dir: str):
global nn_image
with open(os.path.join(path, FILE_GT_COCO), "r") as fh:
coco_data = json.load(fh)
with open(os.path.join(path, FILE_GT), "r") as fh:
gt_data = json.load(fh)
with open(os.path.join(path, FILE_GT_INFO), "r") as fh:
gt_info = json.load(fh)
for img in coco_data["images"]:
rgb_file = os.path.join(path, img["file_name"])
if os.path.isfile(rgb_file):
# if Own_Numbering_Files:
ext = os.path.splitext(rgb_file)[1] # only ext
f = f"{nn_image:06}"
out_img = os.path.join(out_dir, f + ext)
# else:
# f = os.path.split(rgb_file)[1] # filename with extension
# f = os.path.splitext(f)[0] # only filename
# out_img = out_dir
shutil.copy2(rgb_file, out_img)
out_file = os.path.join(out_dir,f+".json")
nn_image += 1
# full annotation of the one image
all_data = camera_data.copy()
cat_names = {obj["id"]: obj["name"] for obj in coco_data["categories"]}
id_img = img["id"] # 0, 1, 2 ...
sid_img = str(id_img) # "0", "1", "2" ...
img_info = gt_info[sid_img]
img_gt = gt_data[sid_img]
img_idx = 0 # object index on the image
objs = []
for ann in coco_data["annotations"]:
if ann["image_id"] == id_img:
item = ann["category_id"]
obj_data = {}
obj_data["class"] = cat_names[item]
x, y, width, height = ann["bbox"]
obj_data["bounding_box"] = {"top_left":[x,y], "bottom_right":[x+width,y+height]}
# visibility from FILE_GT_INFO
item_info = img_info[img_idx]
obj_data["visibility"] = item_info["visib_fract"]
# location from FILE_GT
item_gt = img_gt[img_idx]
obj_id = item_gt["obj_id"] - 1 # index with 0
cam_R_m2c = item_gt["cam_R_m2c"]
cam_t_m2c = item_gt["cam_t_m2c"]
obj_data["location"] = cam_t_m2c
q = t3d.quaternions.mat2quat(np.array(cam_R_m2c))
obj_data["quaternion_xyzw"] = [q[1], q[2], q[3], q[0]]
cuboid_xyz = model_info[obj_id]
obj_data["projected_cuboid"] = [
trans_3Dto2D_point_in_camera(cub, K_intrinsic, cam_R_m2c, cam_t_m2c)
for cub in cuboid_xyz
]
objs.append(obj_data)
img_idx += 1
all_data["objects"] = objs
with open(out_file, "w") as fh:
json.dump(all_data, fh, indent=2)
def explore(path: str, res_dir: str):
if not os.path.isdir(path):
return
folders = [
os.path.join(path, o)
for o in os.listdir(path)
if os.path.isdir(os.path.join(path, o))
]
for path_entry in folders:
if os.path.isfile(os.path.join(path_entry,FILE_GT_COCO)) and \
os.path.isfile(os.path.join(path_entry,FILE_GT_INFO)) and \
os.path.isfile(os.path.join(path_entry,FILE_GT)):
gt_parse(path_entry, res_dir)
else:
explore(path_entry, res_dir)
def BOP2DOPE_dataset(dpath: str, out_dir: str) -> str:
""" Convert BOP-dataset to YOLO format for train """
res_dir = os.path.join(out_dir, DIR_ROOT_DS)
if os.path.isdir(res_dir):
shutil.rmtree(res_dir)
os.mkdir(res_dir)
explore(dpath, res_dir)
return out_dir
def train(dopepath:str, wname:str, epochs:int, pretrain: bool, lname: list):
import random
# try:
import configparser as configparser
# except ImportError:
# import ConfigParser as configparser
import torch
# import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
from torch.autograd import Variable
import datetime
from tensorboardX import SummaryWriter
from models_dope import DopeNetwork
from utils_dope import CleanVisiiDopeLoader #, VisualizeBeliefMap, save_image
import warnings
warnings.filterwarnings("ignore")
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
torch.autograd.set_detect_anomaly(False)
torch.autograd.profiler.profile(False)
torch.autograd.gradcheck = False
torch.backends.cudnn.benchmark = True
start_time = datetime.datetime.now()
print("start:", start_time.strftime("%m/%d/%Y, %H:%M:%S"))
res_model = os.path.join(dopepath, wname + EXT_MODEL)
local_rank = 0
opt = lambda: None
opt.use_s3 = False
opt.train_buckets = []
opt.endpoint = None
opt.lr=0.0001
opt.loginterval=100
opt.sigma=0.5 # 4
opt.nbupdates=None
# opt.save=False
# opt.option="default"
# opt.gpuids=[0]
opt.namefile=FILE_MODEL
opt.workers=8
opt.batchsize=16
opt.data = [os.path.join(dopepath, DIR_ROOT_DS)]
opt.outf = os.path.join(dopepath, DIR_TRAIN_OUT)
opt.object = lname #["fork"]
opt.exts = [EXT_RGB]
# opt.imagesize = im_width
opt.epochs = epochs
opt.pretrained = pretrain
opt.net_path = res_model if pretrain else None
opt.manualseed = random.randint(1, 10000)
# # Validate Arguments
# if opt.use_s3 and (opt.train_buckets is None or opt.endpoint is None):
# raise ValueError(
# "--train_buckets and --endpoint must be specified if training with data from s3 bucket."
# )
# if not opt.use_s3 and opt.data is None:
# raise ValueError("--data field must be specified.")
os.makedirs(opt.outf, exist_ok=True)
# if local_rank == 0:
# writer = SummaryWriter(opt.outf + "/runs/")
random.seed(opt.manualseed)
torch.cuda.set_device(local_rank)
# torch.distributed.init_process_group(backend="nccl", init_method="env://")
torch.manual_seed(opt.manualseed)
torch.cuda.manual_seed_all(opt.manualseed)
# # Data Augmentation
# if not opt.save:
# contrast = 0.2
# brightness = 0.2
# noise = 0.1
# normal_imgs = [0.59, 0.25]
# transform = transforms.Compose(
# [
# AddRandomContrast(0.2),
# AddRandomBrightness(0.2),
# transforms.Resize(opt.imagesize),
# ]
# )
# else:
# contrast = 0.00001
# brightness = 0.00001
# noise = 0.00001
# normal_imgs = None
# transform = transforms.Compose(
# [transforms.Resize(opt.imagesize), transforms.ToTensor()]
# )
# Load Model
net = DopeNetwork()
output_size = 50
# opt.sigma = 0.5
train_dataset = CleanVisiiDopeLoader(
opt.data,
sigma=opt.sigma,
output_size=output_size,
extensions=opt.exts,
objects=opt.object,
use_s3=opt.use_s3,
buckets=opt.train_buckets,
endpoint_url=opt.endpoint,
)
trainingdata = torch.utils.data.DataLoader(
train_dataset,
batch_size=opt.batchsize,
shuffle=True,
num_workers=opt.workers,
pin_memory=True,
)
if not trainingdata is None:
print(f"training data: {len(trainingdata)} batches")
print("Loading Model...")
net = net.cuda()
# net = torch.nn.parallel.DistributedDataParallel(
# net.cuda(), device_ids=[local_rank], output_device=local_rank
# )
if opt.pretrained:
if opt.net_path is not None:
net.load_state_dict(torch.load(opt.net_path))
else:
print("Error: Did not specify path to pretrained weights.")
quit()
parameters = filter(lambda p: p.requires_grad, net.parameters())
optimizer = optim.Adam(parameters, lr=opt.lr)
print("ready to train!")
global nb_update_network
nb_update_network = 0
# best_results = {"epoch": None, "passed": None, "add_mean": None, "add_std": None}
scaler = torch.cuda.amp.GradScaler()
def _runnetwork(epoch, train_loader): #, syn=False
global nb_update_network
# net
net.train()
loss_avg_to_log = {}
loss_avg_to_log["loss"] = []
loss_avg_to_log["loss_affinities"] = []
loss_avg_to_log["loss_belief"] = []
loss_avg_to_log["loss_class"] = []
for batch_idx, targets in enumerate(train_loader):
optimizer.zero_grad()
data = Variable(targets["img"].cuda())
target_belief = Variable(targets["beliefs"].cuda())
target_affinities = Variable(targets["affinities"].cuda())
output_belief, output_aff = net(data)
loss = None
loss_belief = torch.tensor(0).float().cuda()
loss_affinities = torch.tensor(0).float().cuda()
loss_class = torch.tensor(0).float().cuda()
for stage in range(len(output_aff)): # output, each belief map layers.
loss_affinities += (
(output_aff[stage] - target_affinities)
* (output_aff[stage] - target_affinities)
).mean()
loss_belief += (
(output_belief[stage] - target_belief)
* (output_belief[stage] - target_belief)
).mean()
loss = loss_affinities + loss_belief
# if batch_idx == 0:
# post = "train"
# if local_rank == 0:
# for i_output in range(1):
# # input images
# writer.add_image(
# f"{post}_input_{i_output}",
# targets["img_original"][i_output],
# epoch,
# dataformats="CWH",
# )
# # belief maps gt
# imgs = VisualizeBeliefMap(target_belief[i_output])
# img, grid = save_image(
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
# )
# writer.add_image(
# f"{post}_belief_ground_truth_{i_output}",
# grid,
# epoch,
# dataformats="CWH",
# )
# # belief maps guess
# imgs = VisualizeBeliefMap(output_belief[-1][i_output])
# img, grid = save_image(
# imgs, "some_img.png", mean=0, std=1, nrow=3, save=False
# )
# writer.add_image(
# f"{post}_belief_guess_{i_output}",
# grid,
# epoch,
# dataformats="CWH",
# )
loss.backward()
optimizer.step()
nb_update_network += 1
# log the loss
loss_avg_to_log["loss"].append(loss.item())
loss_avg_to_log["loss_class"].append(loss_class.item())
loss_avg_to_log["loss_affinities"].append(loss_affinities.item())
loss_avg_to_log["loss_belief"].append(loss_belief.item())
if batch_idx % opt.loginterval == 0:
print(
"Train Epoch: {} [{}/{} ({:.0f}%)] \tLoss: {:.15f} \tLocal Rank: {}".format(
epoch,
batch_idx * len(data),
len(train_loader.dataset),
100.0 * batch_idx / len(train_loader),
loss.item(),
local_rank,
)
)
# # log the loss values
# if local_rank == 0:
# writer.add_scalar("loss/train_loss", np.mean(loss_avg_to_log["loss"]), epoch)
# writer.add_scalar("loss/train_cls", np.mean(loss_avg_to_log["loss_class"]), epoch)
# writer.add_scalar("loss/train_aff", np.mean(loss_avg_to_log["loss_affinities"]), epoch)
# writer.add_scalar("loss/train_bel", np.mean(loss_avg_to_log["loss_belief"]), epoch)
for epoch in range(1, opt.epochs + 1):
_runnetwork(epoch, trainingdata)
try:
if local_rank == 0:
torch.save(
net.state_dict(),
f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}.pth",
)
except Exception as e:
print(f"Encountered Exception: {e}")
if not opt.nbupdates is None and nb_update_network > int(opt.nbupdates):
break
# if local_rank == 0:
# save result model
torch.save(net.state_dict(), res_model) #os.path.join(dopepath, wname + EXT_MODEL))
# else:
# torch.save(
# net.state_dict(),
# f"{opt.outf}/{opt.namefile}_{str(epoch).zfill(3)}_rank_{local_rank}.pth",
# )
print("end:", datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))
print("Total time taken: ", str(datetime.datetime.now() - start_time).split(".")[0])
def train_Dope_i(path:str, wname:str, dname:str, outpath:str, epochs:int, pretrain: bool):
""" Main procedure for train DOPE model """
global K_intrinsic, model_info, camera_data, im_width
if not os.path.isdir(outpath):
print(f"Invalid output path '{outpath}'")
exit(-1)
out_dir = os.path.join(outpath, wname)
ds_path = os.path.join(path, dname)
if not os.path.isdir(ds_path):
print(f"{ds_path} : no BOP directory")
return ""
camera_json = os.path.join(ds_path, FILE_CAMERA)
if not os.path.isfile(camera_json):
print(f"{camera_json} : no intrinsic camera file")
return ""
rbs_info = os.path.join(ds_path, FILE_RBS_INFO)
if not os.path.isfile(rbs_info):
print(f"{rbs_info} : no dataset info file")
return ""
camera_data = {}
with open(camera_json, "r") as fh:
data = json.load(fh)
keys = ["cx","cy","fx","fy"]
intrinsic = {k: data[k] for k in keys}
im_height = data["height"]
im_width = data["width"]
camera_data["camera_data"] = dict(intrinsic=intrinsic, height=im_height, width=im_width)
K_intrinsic = [
[data["fx"], 0.0, data["cx"]],
[0.0, data["fy"], data["cy"]],
[0.0, 0.0, 1.0]
]
# calc cuboid + center
with open(rbs_info, "r") as fh:
info = json.load(fh)
# список имён объектов
list_name = list(map(lambda x: x["name"], info))
# in FILE_RBS_INFO model numbering from smallest to largest
model_info = []
for m_info in info:
cub = np.array(m_info["cuboid"]) * MODEL_SCALE
xyz_min = cub.min(axis=0)
xyz_max = cub.max(axis=0)
# [xc, yc, zc] # min + (max - min) / 2
center = []
for i in range(3):
center.append(xyz_min[i] + (xyz_max[i]- xyz_min[i]) / 2)
c = np.array(center, ndmin=2)
model_info.append(np.append(cub, c, axis=0))
if pretrain:
# продолжить обучение
if not os.path.isdir(out_dir):
print(f"No dir '{out_dir}'")
exit(-2)
dpath = out_dir
# model_path = os.path.join(dpath, wname + ".pt")
else:
# обучение сначала
if not os.path.isdir(out_dir):
os.mkdir(out_dir)
dpath = BOP2DOPE_dataset(ds_path, out_dir)
if len(dpath) == 0:
print(f"Error in convert dataset '{ds_path}' to '{outpath}'")
exit(-4)
# model_path = os.path.join(dpath, FILE_BASEMODEL)
# results = f"python train.py --local_rank 0 --data {dpath} --object fork" \
# + f" -e {epochs} --batchsize 16 --exts jpg --imagesize 640 --pretrained" \
# + " --net_path /home/shalenikol/fork_work/dope_training/output/weights_2996/net_epoch_47.pth"
# print(results)
train(dpath, wname, epochs, pretrain, list_name)
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--path", required=True, help="Path for dataset")
parser.add_argument("--name", required=True, help="String with result weights name")
parser.add_argument("--datasetName", required=True, help="String with dataset name")
parser.add_argument("--outpath", default="weights", help="Output path for weights")
parser.add_argument("--epoch", default=3, help="How many training epochs")
parser.add_argument('--pretrain', action="store_true", help="Use pretraining")
args = parser.parse_args()
train_Dope_i(args.path, args.name, args.datasetName, args.outpath, args.epoch, args.pretrain)