901 lines
32 KiB
Python
Executable file
901 lines
32 KiB
Python
Executable file
# Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
|
|
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
|
|
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
|
|
|
|
"""
|
|
Contains the following classes:
|
|
- ModelData - High level information encapsulation
|
|
- ObjectDetector - Greedy algorithm to build cuboids from belief maps
|
|
"""
|
|
# 14.06.2024 @shalenikol find_object_poses: remove "cuboid2d"
|
|
|
|
import time
|
|
|
|
import sys
|
|
from os import path
|
|
|
|
import numpy as np
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torchvision.transforms as transforms
|
|
from torch.autograd import Variable
|
|
import torchvision.models as models
|
|
|
|
from scipy.ndimage.filters import gaussian_filter
|
|
from scipy import optimize
|
|
|
|
import sys
|
|
|
|
sys.path.append("../")
|
|
from models import *
|
|
|
|
# Import the definition of the neural network model and cuboids
|
|
from cuboid_pnp_solver import *
|
|
|
|
# global transform for image input
|
|
transform = transforms.Compose(
|
|
[
|
|
# transforms.Scale(IMAGE_SIZE),
|
|
# transforms.CenterCrop((imagesize,imagesize)),
|
|
transforms.ToTensor(),
|
|
# transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
|
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
|
]
|
|
)
|
|
|
|
|
|
# ================================ Models ================================
|
|
|
|
|
|
class DopeNetwork(nn.Module):
|
|
def __init__(
|
|
self,
|
|
numBeliefMap=9,
|
|
numAffinity=16,
|
|
stop_at_stage=6, # number of stages to process (if less than total number of stages)
|
|
):
|
|
super(DopeNetwork, self).__init__()
|
|
|
|
self.stop_at_stage = stop_at_stage
|
|
|
|
vgg_full = models.vgg19(pretrained=False).features
|
|
self.vgg = nn.Sequential()
|
|
for i_layer in range(24):
|
|
self.vgg.add_module(str(i_layer), vgg_full[i_layer])
|
|
|
|
# Add some layers
|
|
i_layer = 23
|
|
self.vgg.add_module(
|
|
str(i_layer), nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
|
|
)
|
|
self.vgg.add_module(str(i_layer + 1), nn.ReLU(inplace=True))
|
|
self.vgg.add_module(
|
|
str(i_layer + 2), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
|
|
)
|
|
self.vgg.add_module(str(i_layer + 3), nn.ReLU(inplace=True))
|
|
|
|
# print('---Belief------------------------------------------------')
|
|
# _2 are the belief map stages
|
|
self.m1_2 = DopeNetwork.create_stage(128, numBeliefMap, True)
|
|
self.m2_2 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
|
)
|
|
self.m3_2 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
|
)
|
|
self.m4_2 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
|
)
|
|
self.m5_2 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
|
)
|
|
self.m6_2 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numBeliefMap, False
|
|
)
|
|
|
|
# print('---Affinity----------------------------------------------')
|
|
# _1 are the affinity map stages
|
|
self.m1_1 = DopeNetwork.create_stage(128, numAffinity, True)
|
|
self.m2_1 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numAffinity, False
|
|
)
|
|
self.m3_1 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numAffinity, False
|
|
)
|
|
self.m4_1 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numAffinity, False
|
|
)
|
|
self.m5_1 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numAffinity, False
|
|
)
|
|
self.m6_1 = DopeNetwork.create_stage(
|
|
128 + numBeliefMap + numAffinity, numAffinity, False
|
|
)
|
|
|
|
def forward(self, x):
|
|
"""Runs inference on the neural network"""
|
|
|
|
out1 = self.vgg(x)
|
|
|
|
out1_2 = self.m1_2(out1)
|
|
out1_1 = self.m1_1(out1)
|
|
|
|
if self.stop_at_stage == 1:
|
|
return [out1_2], [out1_1]
|
|
|
|
out2 = torch.cat([out1_2, out1_1, out1], 1)
|
|
out2_2 = self.m2_2(out2)
|
|
out2_1 = self.m2_1(out2)
|
|
|
|
if self.stop_at_stage == 2:
|
|
return [out1_2, out2_2], [out1_1, out2_1]
|
|
|
|
out3 = torch.cat([out2_2, out2_1, out1], 1)
|
|
out3_2 = self.m3_2(out3)
|
|
out3_1 = self.m3_1(out3)
|
|
|
|
if self.stop_at_stage == 3:
|
|
return [out1_2, out2_2, out3_2], [out1_1, out2_1, out3_1]
|
|
|
|
out4 = torch.cat([out3_2, out3_1, out1], 1)
|
|
out4_2 = self.m4_2(out4)
|
|
out4_1 = self.m4_1(out4)
|
|
|
|
if self.stop_at_stage == 4:
|
|
return [out1_2, out2_2, out3_2, out4_2], [out1_1, out2_1, out3_1, out4_1]
|
|
|
|
out5 = torch.cat([out4_2, out4_1, out1], 1)
|
|
out5_2 = self.m5_2(out5)
|
|
out5_1 = self.m5_1(out5)
|
|
|
|
if self.stop_at_stage == 5:
|
|
return [out1_2, out2_2, out3_2, out4_2, out5_2], [
|
|
out1_1,
|
|
out2_1,
|
|
out3_1,
|
|
out4_1,
|
|
out5_1,
|
|
]
|
|
|
|
out6 = torch.cat([out5_2, out5_1, out1], 1)
|
|
out6_2 = self.m6_2(out6)
|
|
out6_1 = self.m6_1(out6)
|
|
|
|
return [out1_2, out2_2, out3_2, out4_2, out5_2, out6_2], [
|
|
out1_1,
|
|
out2_1,
|
|
out3_1,
|
|
out4_1,
|
|
out5_1,
|
|
out6_1,
|
|
]
|
|
|
|
@staticmethod
|
|
def create_stage(in_channels, out_channels, first=False):
|
|
"""Create the neural network layers for a single stage."""
|
|
|
|
model = nn.Sequential()
|
|
mid_channels = 128
|
|
if first:
|
|
padding = 1
|
|
kernel = 3
|
|
count = 6
|
|
final_channels = 512
|
|
else:
|
|
padding = 3
|
|
kernel = 7
|
|
count = 10
|
|
final_channels = mid_channels
|
|
|
|
# First convolution
|
|
model.add_module(
|
|
"0",
|
|
nn.Conv2d(
|
|
in_channels, mid_channels, kernel_size=kernel, stride=1, padding=padding
|
|
),
|
|
)
|
|
|
|
# Middle convolutions
|
|
i = 1
|
|
while i < count - 1:
|
|
model.add_module(str(i), nn.ReLU(inplace=True))
|
|
i += 1
|
|
model.add_module(
|
|
str(i),
|
|
nn.Conv2d(
|
|
mid_channels,
|
|
mid_channels,
|
|
kernel_size=kernel,
|
|
stride=1,
|
|
padding=padding,
|
|
),
|
|
)
|
|
i += 1
|
|
|
|
# Penultimate convolution
|
|
model.add_module(str(i), nn.ReLU(inplace=True))
|
|
i += 1
|
|
model.add_module(
|
|
str(i), nn.Conv2d(mid_channels, final_channels, kernel_size=1, stride=1)
|
|
)
|
|
i += 1
|
|
|
|
# Last convolution
|
|
model.add_module(str(i), nn.ReLU(inplace=True))
|
|
i += 1
|
|
model.add_module(
|
|
str(i), nn.Conv2d(final_channels, out_channels, kernel_size=1, stride=1)
|
|
)
|
|
i += 1
|
|
|
|
return model
|
|
|
|
|
|
class ModelData(object):
|
|
"""This class contains methods for loading the neural network"""
|
|
|
|
def __init__(self, name="", net_path="", gpu_id=0, architecture="dope"):
|
|
self.name = name
|
|
self.net_path = net_path # Path to trained network model
|
|
self.net = None # Trained network
|
|
self.gpu_id = gpu_id
|
|
self.architecture = architecture
|
|
|
|
def get_net(self):
|
|
"""Returns network"""
|
|
if not self.net:
|
|
self.load_net_model()
|
|
return self.net
|
|
|
|
def load_net_model(self):
|
|
"""Loads network model from disk"""
|
|
if not self.net and path.exists(self.net_path):
|
|
self.net = self.load_net_model_path(self.net_path)
|
|
if not path.exists(self.net_path):
|
|
print("ERROR: Unable to find model weights: '{}'".format(self.net_path))
|
|
exit(0)
|
|
|
|
def load_net_model_path(self, path):
|
|
"""Loads network model from disk with given path"""
|
|
model_loading_start_time = time.time()
|
|
print("Loading DOPE model '{}'...".format(path))
|
|
net = DopeNetwork()
|
|
|
|
net = torch.nn.DataParallel(net, [0]).cuda()
|
|
net.load_state_dict(torch.load(path))
|
|
net.eval()
|
|
print(
|
|
" Model loaded in {:.2f} seconds.".format(
|
|
time.time() - model_loading_start_time
|
|
)
|
|
)
|
|
return net
|
|
|
|
def __str__(self):
|
|
"""Converts to string"""
|
|
return "{}: {}".format(self.name, self.net_path)
|
|
|
|
|
|
# ================================ ObjectDetector ================================
|
|
class ObjectDetector(object):
|
|
"""This class contains methods for object detection"""
|
|
|
|
@staticmethod
|
|
def gaussian(height, center_x, center_y, width_x, width_y):
|
|
"""Returns a gaussian function with the given parameters"""
|
|
width_x = float(width_x)
|
|
width_y = float(width_y)
|
|
return lambda x, y: height * np.exp(
|
|
-(((center_x - x) / width_x) ** 2 + ((center_y - y) / width_y) ** 2) / 2
|
|
)
|
|
|
|
@staticmethod
|
|
def moments(data):
|
|
"""Returns (height, x, y, width_x, width_y)
|
|
the gaussian parameters of a 2D distribution by calculating its
|
|
moments"""
|
|
total = data.sum()
|
|
X, Y = np.indices(data.shape)
|
|
x = (X * data).sum() / total
|
|
y = (Y * data).sum() / total
|
|
col = data[:, int(y)]
|
|
width_x = np.sqrt(
|
|
np.abs((np.arange(col.size) - y) ** 2 * col).sum() / col.sum()
|
|
)
|
|
row = data[int(x), :]
|
|
width_y = np.sqrt(
|
|
np.abs((np.arange(row.size) - x) ** 2 * row).sum() / row.sum()
|
|
)
|
|
height = data.max()
|
|
return height, x, y, width_x, width_y
|
|
|
|
@staticmethod
|
|
def fitgaussian(data):
|
|
"""Returns (height, x, y, width_x, width_y)
|
|
the gaussian parameters of a 2D distribution found by a fit"""
|
|
params = ObjectDetector.moments(data)
|
|
errorfunction = lambda p: np.ravel(
|
|
ObjectDetector.gaussian(*p)(*np.indices(data.shape)) - data
|
|
)
|
|
p, success = optimize.leastsq(errorfunction, params)
|
|
return p
|
|
|
|
@staticmethod
|
|
def make_grid(
|
|
tensor,
|
|
nrow=8,
|
|
padding=2,
|
|
normalize=False,
|
|
range_=None,
|
|
scale_each=False,
|
|
pad_value=0,
|
|
):
|
|
"""Make a grid of images.
|
|
Args:
|
|
tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
|
|
or a list of images all of the same size.
|
|
nrow (int, optional): Number of images displayed in each row of the grid.
|
|
The Final grid size is (B / nrow, nrow). Default is 8.
|
|
padding (int, optional): amount of padding. Default is 2.
|
|
normalize (bool, optional): If True, shift the image to the range (0, 1),
|
|
by subtracting the minimum and dividing by the maximum pixel value.
|
|
range (tuple, optional): tuple (min, max) where min and max are numbers,
|
|
then these numbers are used to normalize the image. By default, min and max
|
|
are computed from the tensor.
|
|
scale_each (bool, optional): If True, scale each image in the batch of
|
|
images separately rather than the (min, max) over all images.
|
|
pad_value (float, optional): Value for the padded pixels.
|
|
Example:
|
|
See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_
|
|
"""
|
|
import math
|
|
|
|
if not (
|
|
torch.is_tensor(tensor)
|
|
or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))
|
|
):
|
|
raise TypeError(
|
|
"tensor or list of tensors expected, got {}".format(type(tensor))
|
|
)
|
|
|
|
# if list of tensors, convert to a 4D mini-batch Tensor
|
|
if isinstance(tensor, list):
|
|
tensor = torch.stack(tensor, dim=0)
|
|
|
|
if tensor.dim() == 2: # single image H x W
|
|
tensor = tensor.view(1, tensor.size(0), tensor.size(1))
|
|
if tensor.dim() == 3: # single image
|
|
if tensor.size(0) == 1: # if single-channel, convert to 3-channel
|
|
tensor = torch.cat((tensor, tensor, tensor), 0)
|
|
tensor = tensor.view(1, tensor.size(0), tensor.size(1), tensor.size(2))
|
|
|
|
if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
|
|
tensor = torch.cat((tensor, tensor, tensor), 1)
|
|
|
|
if normalize is True:
|
|
tensor = tensor.clone() # avoid modifying tensor in-place
|
|
if range_ is not None:
|
|
assert isinstance(
|
|
range_, tuple
|
|
), "range has to be a tuple (min, max) if specified. min and max are numbers"
|
|
|
|
def norm_ip(img, min, max):
|
|
img.clamp_(min=min, max=max)
|
|
img.add_(-min).div_(max - min + 1e-5)
|
|
|
|
def norm_range(t, range_):
|
|
if range_ is not None:
|
|
norm_ip(t, range_[0], range_[1])
|
|
else:
|
|
norm_ip(t, float(t.min()), float(t.max()))
|
|
|
|
if scale_each is True:
|
|
for t in tensor: # loop over mini-batch dimension
|
|
norm_range(t, range)
|
|
else:
|
|
norm_range(tensor, range)
|
|
|
|
if tensor.size(0) == 1:
|
|
return tensor.squeeze()
|
|
|
|
# make the mini-batch of images into a grid
|
|
nmaps = tensor.size(0)
|
|
xmaps = min(nrow, nmaps)
|
|
ymaps = int(math.ceil(float(nmaps) / xmaps))
|
|
height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
|
|
grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(
|
|
pad_value
|
|
)
|
|
k = 0
|
|
for y in range(ymaps):
|
|
for x in range(xmaps):
|
|
if k >= nmaps:
|
|
break
|
|
grid.narrow(1, y * height + padding, height - padding).narrow(
|
|
2, x * width + padding, width - padding
|
|
).copy_(tensor[k])
|
|
k = k + 1
|
|
return grid
|
|
|
|
@staticmethod
|
|
def get_image_grid(tensor, filename, nrow=3, padding=2, mean=None, std=None):
|
|
"""
|
|
Saves a given Tensor into an image file.
|
|
If given a mini-batch tensor, will save the tensor as a grid of images.
|
|
"""
|
|
from PIL import Image
|
|
|
|
# tensor = tensor.cpu()
|
|
grid = ObjectDetector.make_grid(tensor, nrow=nrow, padding=10, pad_value=1)
|
|
if not mean is None:
|
|
# ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0,2).transpose(0,1).numpy()
|
|
ndarr = (
|
|
grid.mul(std)
|
|
.add(mean)
|
|
.mul(255)
|
|
.byte()
|
|
.transpose(0, 2)
|
|
.transpose(0, 1)
|
|
.numpy()
|
|
)
|
|
else:
|
|
ndarr = (
|
|
grid.mul(0.5)
|
|
.add(0.5)
|
|
.mul(255)
|
|
.byte()
|
|
.transpose(0, 2)
|
|
.transpose(0, 1)
|
|
.numpy()
|
|
)
|
|
im = Image.fromarray(ndarr)
|
|
# im.save(filename)
|
|
return im
|
|
|
|
@staticmethod
|
|
def detect_object_in_image(
|
|
net_model, pnp_solver, in_img, config, grid_belief_debug=False, norm_belief=True
|
|
):
|
|
"""Detect objects in a image using a specific trained network model
|
|
Returns the poses of the objects and the belief maps
|
|
"""
|
|
|
|
if in_img is None:
|
|
return []
|
|
|
|
# print("detect_object_in_image - image shape: {}".format(in_img.shape))
|
|
|
|
# Run network inference
|
|
image_tensor = transform(in_img)
|
|
image_torch = Variable(image_tensor).cuda().unsqueeze(0)
|
|
out, seg = net_model(
|
|
image_torch
|
|
) # run inference using the network (calls 'forward' method)
|
|
vertex2 = out[-1][0]
|
|
aff = seg[-1][0]
|
|
|
|
# Find objects from network output
|
|
detected_objects = ObjectDetector.find_object_poses(
|
|
vertex2, aff, pnp_solver, config
|
|
)
|
|
|
|
if not grid_belief_debug:
|
|
|
|
return detected_objects, None
|
|
else:
|
|
# Run the belief maps debug display on the beliefmaps
|
|
|
|
upsampling = nn.UpsamplingNearest2d(scale_factor=8)
|
|
tensor = vertex2
|
|
belief_imgs = []
|
|
in_img = torch.tensor(in_img).float() / 255.0
|
|
in_img *= 0.7
|
|
|
|
for j in range(tensor.size()[0]):
|
|
belief = tensor[j].clone()
|
|
if norm_belief:
|
|
belief -= float(torch.min(belief)[0].data.cpu().numpy())
|
|
belief /= float(torch.max(belief)[0].data.cpu().numpy())
|
|
|
|
belief = (
|
|
upsampling(belief.unsqueeze(0).unsqueeze(0))
|
|
.squeeze()
|
|
.squeeze()
|
|
.data
|
|
)
|
|
belief = torch.clamp(belief, 0, 1).cpu()
|
|
belief = torch.cat(
|
|
[
|
|
belief.unsqueeze(0) + in_img[:, :, 0],
|
|
belief.unsqueeze(0) + in_img[:, :, 1],
|
|
belief.unsqueeze(0) + in_img[:, :, 2],
|
|
]
|
|
).unsqueeze(0)
|
|
belief = torch.clamp(belief, 0, 1)
|
|
|
|
# belief_imgs.append(belief.data.squeeze().cpu().numpy().transpose(1,2,0))
|
|
belief_imgs.append(belief.data.squeeze().numpy())
|
|
|
|
# Create the image grid
|
|
belief_imgs = torch.tensor(np.array(belief_imgs))
|
|
|
|
im_belief = ObjectDetector.get_image_grid(belief_imgs, None, mean=0, std=1)
|
|
|
|
return detected_objects, im_belief
|
|
|
|
@staticmethod
|
|
def find_object_poses(
|
|
vertex2,
|
|
aff,
|
|
pnp_solver,
|
|
config,
|
|
run_sampling=False,
|
|
num_sample=100,
|
|
scale_factor=8,
|
|
):
|
|
"""Detect objects given network output"""
|
|
|
|
# run_sampling = True
|
|
|
|
# Detect objects from belief maps and affinities
|
|
objects, all_peaks = ObjectDetector.find_objects(
|
|
vertex2,
|
|
aff,
|
|
config,
|
|
run_sampling=run_sampling,
|
|
num_sample=num_sample,
|
|
scale_factor=scale_factor,
|
|
)
|
|
detected_objects = []
|
|
obj_name = pnp_solver.object_name
|
|
|
|
print(all_peaks)
|
|
|
|
# print("find_object_poses: found {} objects ================".format(len(objects)))
|
|
for obj in objects:
|
|
# Run PNP
|
|
points = obj[1] + [(obj[0][0] * scale_factor, obj[0][1] * scale_factor)]
|
|
# print(points)
|
|
# cuboid2d = np.copy(points)
|
|
location, quaternion, projected_points = pnp_solver.solve_pnp(points)
|
|
|
|
# run multiple sample
|
|
if run_sampling:
|
|
lx, ly, lz = [], [], []
|
|
qx, qy, qz, qw = [], [], [], []
|
|
|
|
for i_sample in range(num_sample):
|
|
sample = []
|
|
for i_point in range(len(obj[-1])):
|
|
if not obj[-1][i_point][i_sample] is None:
|
|
sample.append(
|
|
(
|
|
obj[-1][i_point][i_sample][0] * scale_factor,
|
|
obj[-1][i_point][i_sample][1] * scale_factor,
|
|
)
|
|
)
|
|
else:
|
|
sample.append(None)
|
|
# final_cuboids.append(sample)
|
|
pnp_sample = pnp_solver.solve_pnp(sample)
|
|
|
|
try:
|
|
lx.append(pnp_sample[0][0])
|
|
ly.append(pnp_sample[0][1])
|
|
lz.append(pnp_sample[0][2])
|
|
|
|
qx.append(pnp_sample[1][0])
|
|
qy.append(pnp_sample[1][1])
|
|
qz.append(pnp_sample[1][2])
|
|
qw.append(pnp_sample[1][3])
|
|
except:
|
|
pass
|
|
# TODO
|
|
# RUN quaternion as well for the std and avg.
|
|
|
|
try:
|
|
print("----")
|
|
print("location:")
|
|
print(location[0], location[1], location[2])
|
|
print(np.mean(lx), np.mean(ly), np.mean(lz))
|
|
print(np.std(lx), np.std(ly), np.std(lz))
|
|
print("quaternion:")
|
|
print(quaternion[0], quaternion[1], quaternion[2], quaternion[3])
|
|
print(np.mean(qx), np.mean(qy), np.mean(qz), np.mean(qw))
|
|
print(np.std(qx), np.std(qy), np.std(qz), np.std(qw))
|
|
|
|
except:
|
|
pass
|
|
if not location is None:
|
|
detected_objects.append(
|
|
{
|
|
"name": obj_name,
|
|
"location": location,
|
|
"quaternion": quaternion,
|
|
# "cuboid2d": cuboid2d,
|
|
"projected_points": projected_points,
|
|
"confidence": obj[-1],
|
|
"raw_points": points,
|
|
}
|
|
)
|
|
|
|
# print("find_object_poses: points = ", type(points), points)
|
|
# print("find_object_poses: locn = ", location, "quat =", quaternion)
|
|
# print("find_object_poses: projected_points = ", type(projected_points), projected_points)
|
|
|
|
return detected_objects
|
|
|
|
@staticmethod
|
|
def find_objects(
|
|
vertex2,
|
|
aff,
|
|
config,
|
|
numvertex=8,
|
|
run_sampling=False,
|
|
num_sample=100,
|
|
scale_factor=8,
|
|
):
|
|
"""Detects objects given network belief maps and affinities, using heuristic method"""
|
|
|
|
all_peaks = []
|
|
all_samples = []
|
|
|
|
peak_counter = 0
|
|
for j in range(vertex2.size()[0]):
|
|
belief = vertex2[j].clone()
|
|
map_ori = belief.cpu().data.numpy()
|
|
|
|
map = gaussian_filter(belief.cpu().data.numpy(), sigma=config.sigma)
|
|
p = 1
|
|
map_left = np.zeros(map.shape)
|
|
map_left[p:, :] = map[:-p, :]
|
|
map_right = np.zeros(map.shape)
|
|
map_right[:-p, :] = map[p:, :]
|
|
map_up = np.zeros(map.shape)
|
|
map_up[:, p:] = map[:, :-p]
|
|
map_down = np.zeros(map.shape)
|
|
map_down[:, :-p] = map[:, p:]
|
|
|
|
peaks_binary = np.logical_and.reduce(
|
|
(
|
|
map >= map_left,
|
|
map >= map_right,
|
|
map >= map_up,
|
|
map >= map_down,
|
|
map > config.thresh_map,
|
|
)
|
|
)
|
|
peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])
|
|
|
|
# Computing the weigthed average for localizing the peaks
|
|
peaks = list(peaks)
|
|
win = 11
|
|
ran = win // 2
|
|
peaks_avg = []
|
|
point_sample_list = []
|
|
for p_value in range(len(peaks)):
|
|
p = peaks[p_value]
|
|
weights = np.zeros((win, win))
|
|
i_values = np.zeros((win, win))
|
|
j_values = np.zeros((win, win))
|
|
for i in range(-ran, ran + 1):
|
|
for j in range(-ran, ran + 1):
|
|
if (
|
|
p[1] + i < 0
|
|
or p[1] + i >= map_ori.shape[0]
|
|
or p[0] + j < 0
|
|
or p[0] + j >= map_ori.shape[1]
|
|
):
|
|
continue
|
|
|
|
i_values[j + ran, i + ran] = p[1] + i
|
|
j_values[j + ran, i + ran] = p[0] + j
|
|
|
|
weights[j + ran, i + ran] = map_ori[p[1] + i, p[0] + j]
|
|
# if the weights are all zeros
|
|
# then add the none continuous points
|
|
OFFSET_DUE_TO_UPSAMPLING = 0.4395
|
|
|
|
# Sample the points using the gaussian
|
|
if run_sampling:
|
|
data = weights
|
|
params = ObjectDetector.fitgaussian(data)
|
|
fit = ObjectDetector.gaussian(*params)
|
|
_, mu_x, mu_y, std_x, std_y = params
|
|
points_sample = np.random.multivariate_normal(
|
|
np.array(
|
|
[
|
|
p[1] + mu_x + OFFSET_DUE_TO_UPSAMPLING,
|
|
p[0] - mu_y + OFFSET_DUE_TO_UPSAMPLING,
|
|
]
|
|
),
|
|
# np.array([[std_x*std_x,0],[0,std_y*std_y]]), size=num_sample)
|
|
np.array([[std_x, 0], [0, std_y]]),
|
|
size=num_sample,
|
|
)
|
|
point_sample_list.append(points_sample)
|
|
|
|
try:
|
|
peaks_avg.append(
|
|
(
|
|
np.average(j_values, weights=weights)
|
|
+ OFFSET_DUE_TO_UPSAMPLING,
|
|
np.average(i_values, weights=weights)
|
|
+ OFFSET_DUE_TO_UPSAMPLING,
|
|
)
|
|
)
|
|
except:
|
|
peaks_avg.append(
|
|
(
|
|
p[0] + OFFSET_DUE_TO_UPSAMPLING,
|
|
p[1] + OFFSET_DUE_TO_UPSAMPLING,
|
|
)
|
|
)
|
|
|
|
# Note: Python3 doesn't support len for zip object
|
|
peaks_len = min(
|
|
len(np.nonzero(peaks_binary)[1]), len(np.nonzero(peaks_binary)[0])
|
|
)
|
|
|
|
peaks_with_score = [
|
|
peaks_avg[x_] + (map_ori[peaks[x_][1], peaks[x_][0]],)
|
|
for x_ in range(len(peaks))
|
|
]
|
|
|
|
id = range(peak_counter, peak_counter + peaks_len)
|
|
|
|
peaks_with_score_and_id = [
|
|
peaks_with_score[i] + (id[i],) for i in range(len(id))
|
|
]
|
|
|
|
all_peaks.append(peaks_with_score_and_id)
|
|
all_samples.append(point_sample_list)
|
|
peak_counter += peaks_len
|
|
|
|
objects = []
|
|
|
|
if aff is None:
|
|
# Assume there is only one object
|
|
points = [None for i in range(numvertex)]
|
|
for i_peak, peaks in enumerate(all_peaks):
|
|
# print (peaks)
|
|
for peak in peaks:
|
|
if peak[2] > config.threshold:
|
|
points[i_peak] = (peak[0], peak[1])
|
|
|
|
return points
|
|
|
|
# Check object centroid and build the objects if the centroid is found
|
|
for nb_object in range(len(all_peaks[-1])):
|
|
if all_peaks[-1][nb_object][2] > config.thresh_points:
|
|
objects.append(
|
|
[
|
|
[
|
|
all_peaks[-1][nb_object][:2][0],
|
|
all_peaks[-1][nb_object][:2][1],
|
|
],
|
|
[None for i in range(numvertex)],
|
|
[None for i in range(numvertex)],
|
|
all_peaks[-1][nb_object][2],
|
|
[
|
|
[None for j in range(num_sample)]
|
|
for i in range(numvertex + 1)
|
|
],
|
|
]
|
|
)
|
|
|
|
# Check if the object was added before
|
|
if run_sampling and nb_object < len(objects):
|
|
# add the samples to the object centroids
|
|
objects[nb_object][4][-1] = all_samples[-1][nb_object]
|
|
|
|
# Working with an output that only has belief maps
|
|
if aff is None:
|
|
if len(objects) > 0 and len(all_peaks) > 0 and len(all_peaks[0]) > 0:
|
|
for i_points in range(8):
|
|
if (
|
|
len(all_peaks[i_points]) > 0
|
|
and all_peaks[i_points][0][2] > config.threshold
|
|
):
|
|
objects[0][1][i_points] = (
|
|
all_peaks[i_points][0][0],
|
|
all_peaks[i_points][0][1],
|
|
)
|
|
else:
|
|
# For all points found
|
|
for i_lists in range(len(all_peaks[:-1])):
|
|
lists = all_peaks[i_lists]
|
|
|
|
# Candidate refers to point that needs to be match with a centroid object
|
|
for i_candidate, candidate in enumerate(lists):
|
|
if candidate[2] < config.thresh_points:
|
|
continue
|
|
|
|
i_best = -1
|
|
best_dist = 10000
|
|
best_angle = 100
|
|
|
|
# Find the points that links to that centroid.
|
|
for i_obj in range(len(objects)):
|
|
center = [objects[i_obj][0][0], objects[i_obj][0][1]]
|
|
|
|
# integer is used to look into the affinity map,
|
|
# but the float version is used to run
|
|
point_int = [int(candidate[0]), int(candidate[1])]
|
|
point = [candidate[0], candidate[1]]
|
|
|
|
# look at the distance to the vector field.
|
|
v_aff = (
|
|
np.array(
|
|
[
|
|
aff[
|
|
i_lists * 2, point_int[1], point_int[0]
|
|
].data.item(),
|
|
aff[
|
|
i_lists * 2 + 1, point_int[1], point_int[0]
|
|
].data.item(),
|
|
]
|
|
)
|
|
* 10
|
|
)
|
|
|
|
# normalize the vector
|
|
xvec = v_aff[0]
|
|
yvec = v_aff[1]
|
|
|
|
norms = np.sqrt(xvec * xvec + yvec * yvec)
|
|
|
|
xvec /= norms
|
|
yvec /= norms
|
|
|
|
v_aff = np.concatenate([[xvec], [yvec]])
|
|
|
|
v_center = np.array(center) - np.array(point)
|
|
xvec = v_center[0]
|
|
yvec = v_center[1]
|
|
|
|
norms = np.sqrt(xvec * xvec + yvec * yvec)
|
|
|
|
xvec /= norms
|
|
yvec /= norms
|
|
|
|
v_center = np.concatenate([[xvec], [yvec]])
|
|
|
|
# vector affinity
|
|
dist_angle = np.linalg.norm(v_center - v_aff)
|
|
|
|
# distance between vertexes
|
|
dist_point = np.linalg.norm(np.array(point) - np.array(center))
|
|
|
|
if (
|
|
dist_angle < config.thresh_angle
|
|
and best_dist > 1000
|
|
or dist_angle < config.thresh_angle
|
|
and best_dist > dist_point
|
|
):
|
|
i_best = i_obj
|
|
best_angle = dist_angle
|
|
best_dist = dist_point
|
|
|
|
if i_best == -1:
|
|
continue
|
|
|
|
if (
|
|
objects[i_best][1][i_lists] is None
|
|
or best_angle < config.thresh_angle
|
|
and best_dist < objects[i_best][2][i_lists][1]
|
|
):
|
|
# set the points
|
|
objects[i_best][1][i_lists] = (
|
|
(candidate[0]) * scale_factor,
|
|
(candidate[1]) * scale_factor,
|
|
)
|
|
# set information about the points: angle and distance
|
|
objects[i_best][2][i_lists] = (best_angle, best_dist)
|
|
# add the sample points
|
|
if run_sampling:
|
|
objects[i_best][4][i_lists] = all_samples[i_lists][
|
|
i_candidate
|
|
]
|
|
return objects, all_peaks
|