add rbss_poseestimation

This commit is contained in:
shalenikol 2024-10-15 11:14:55 +03:00 committed by Bill Finger
parent dba3b8682a
commit b8dad502a1
11 changed files with 1930 additions and 0 deletions

View file

@ -0,0 +1,52 @@
cmake_minimum_required(VERSION 3.8)
project(rbss_poseestimation)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()
# find dependencies
find_package(ament_cmake REQUIRED)
find_package(ament_cmake_python REQUIRED)
find_package(rclpy REQUIRED)
# find_package(rclcpp REQUIRED)
find_package(image_transport REQUIRED)
find_package(cv_bridge REQUIRED)
find_package(sensor_msgs REQUIRED)
find_package(std_msgs REQUIRED)
find_package(geometry_msgs REQUIRED)
find_package(rbs_skill_interfaces REQUIRED)
find_package(PCL 1.12 REQUIRED)
find_package(pcl_conversions REQUIRED)
find_package(pcl_ros REQUIRED)
find_package(tf2_ros REQUIRED)
find_package(tf2_msgs REQUIRED)
find_package(visualization_msgs REQUIRED)
# Install Python modules
ament_python_install_package(${PROJECT_NAME})
# Install Python executables
install(PROGRAMS
scripts/pe_dope_lc.py
DESTINATION lib/${PROJECT_NAME}
)
install(
DIRECTORY config launch
DESTINATION share/${PROJECT_NAME}
)
if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
# the following line skips the linter which checks for copyrights
# comment the line when a copyright and license is added to all source files
set(ament_cmake_copyright_FOUND TRUE)
# the following line skips cpplint (only works in a git repo)
# comment the line when this package is in a git repo and when
# a copyright and license is added to all source files
set(ament_cmake_cpplint_FOUND TRUE)
ament_lint_auto_find_test_dependencies()
endif()
ament_package()

View file

@ -0,0 +1,24 @@
input_is_rectified: True # Whether the input image is rectified (strongly suggested!)
downscale_height: 480 # if the input image is larger than this, scale it down to this pixel height
# # Cuboid dimension in cm x,y,z
# dimensions: {
# "model1": [13.7, 16.5, 20.2],
# "model2": [10.0, 10.0, 22.64],
# }
# class_ids: {
# "model1": 1,
# "model2": 2,
# }
# draw_colors: {
# "model1": [13, 255, 128], # green
# "model2": [255, 255, 255], # white
# }
# Config params for DOPE
thresh_angle: 0.5
thresh_map: 0.01
sigma: 3
thresh_points: 0.0

View file

@ -0,0 +1,17 @@
# from launch_ros.actions import Node
# from launch import LaunchDescription
def generate_launch_description():
print("The skill must be launched via interface node")
return []
# declared_arguments = []
# rbss_pe = Node(
# package="rbss_poseestimation",
# executable="od_dope_lc.py",
# )
# nodes_to_start = [
# rbss_pe,
# ]
# return LaunchDescription(declared_arguments + nodes_to_start)

View file

@ -0,0 +1,27 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>rbss_poseestimation</name>
<version>0.0.1</version>
<description>The Pose Estimation skill</description>
<maintainer email="shaniks77s@gmail.com">shalenikol</maintainer>
<license>Apache License 2.0</license>
<buildtool_depend>ament_cmake</buildtool_depend>
<buildtool_depend>ament_cmake_python</buildtool_depend>
<depend>rclpy</depend>
<!-- <depend>rclcpp</depend> -->
<depend>image_transport</depend>
<depend>cv_bridge</depend>
<depend>sensor_msgs</depend>
<depend>std_msgs</depend>
<depend>rbs_skill_interfaces</depend>
<test_depend>ament_lint_auto</test_depend>
<test_depend>ament_lint_common</test_depend>
<export>
<build_type>ament_cmake</build_type>
</export>
</package>

View file

@ -0,0 +1,75 @@
{
"SkillPackage": {
"name": "Robossembler",
"version": "1.0",
"format": "1"
},
"Module": {
"name": "PoseEstimation",
"description": "Pose Estimation skill with DOPE",
"node_name": "lc_dope"
},
"Launch": {
"package": "rbss_poseestimation",
"executable": "pe_dope_lc.py"
},
"BTAction": [
{
"name": "peConfigure",
"type": "run",
"param": [
{
"type": "weights",
"dependency": {
"object_name": "knight",
"weights_file": "/home/shalenikol/0_rbs/w_knight.pth",
"dimensions": [
0.03,
0.026,
0.065
]
}
}
]
},
{
"name": "peStop",
"type": "stop",
"param": [],
"result": []
}
],
"Interface": {
"Input": [
{
"name": "cameraLink",
"type": "CAMERA"
},
{
"name": "object_name",
"type": "MODEL"
}
],
"Output": [
{
"name": "object_detection",
"type": "BoundBox",
"description": "Topic for publisher"
}
]
},
"Settings": [
{
"name": "cameraLink",
"value": "rgbd_camera"
},
{
"name": "publishDelay",
"value": 0.5
},
{
"name": "is_image_mode",
"value": true
}
]
}

View file

@ -0,0 +1,126 @@
# Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
from enum import IntEnum, unique
import numpy as np
import cv2
# Related to the object's local coordinate system
# @unique
class CuboidVertexType(IntEnum):
FrontTopRight = 0
FrontTopLeft = 1
FrontBottomLeft = 2
FrontBottomRight = 3
RearTopRight = 4
RearTopLeft = 5
RearBottomLeft = 6
RearBottomRight = 7
Center = 8
TotalCornerVertexCount = 8 # Corner vertexes doesn't include the center point
TotalVertexCount = 9
# List of the vertex indexes in each line edges of the cuboid
CuboidLineIndexes = [
# Front face
[CuboidVertexType.FrontTopLeft, CuboidVertexType.FrontTopRight],
[CuboidVertexType.FrontTopRight, CuboidVertexType.FrontBottomRight],
[CuboidVertexType.FrontBottomRight, CuboidVertexType.FrontBottomLeft],
[CuboidVertexType.FrontBottomLeft, CuboidVertexType.FrontTopLeft],
# Back face
[CuboidVertexType.RearTopLeft, CuboidVertexType.RearTopRight],
[CuboidVertexType.RearTopRight, CuboidVertexType.RearBottomRight],
[CuboidVertexType.RearBottomRight, CuboidVertexType.RearBottomLeft],
[CuboidVertexType.RearBottomLeft, CuboidVertexType.RearTopLeft],
# Left face
[CuboidVertexType.FrontBottomLeft, CuboidVertexType.RearBottomLeft],
[CuboidVertexType.FrontTopLeft, CuboidVertexType.RearTopLeft],
# Right face
[CuboidVertexType.FrontBottomRight, CuboidVertexType.RearBottomRight],
[CuboidVertexType.FrontTopRight, CuboidVertexType.RearTopRight],
]
# ========================= Cuboid3d =========================
class Cuboid3d:
"""This class contains a 3D cuboid."""
# Create a box with a certain size
def __init__(
self,
size3d=[1.0, 1.0, 1.0],
center_location=[0, 0, 0],
coord_system=None,
parent_object=None,
):
# NOTE: This local coordinate system is similar
# to the intrinsic transform matrix of a 3d object
self.center_location = center_location
self.coord_system = coord_system
self.size3d = size3d
self._vertices = [0, 0, 0] * CuboidVertexType.TotalVertexCount
self.generate_vertexes()
def get_vertex(self, vertex_type):
"""Returns the location of a vertex.
Args:
vertex_type: enum of type CuboidVertexType
Returns:
Numpy array(3) - Location of the vertex type in the cuboid
"""
return self._vertices[vertex_type]
def get_vertices(self):
return self._vertices
def generate_vertexes(self):
width, height, depth = self.size3d
# By default just use the normal OpenCV coordinate system
if self.coord_system is None:
cx, cy, cz = self.center_location
# X axis point to the right
right = cx + width / 2.0
left = cx - width / 2.0
# Y axis point downward
top = cy - height / 2.0
bottom = cy + height / 2.0
# Z axis point forward
front = cz + depth / 2.0
rear = cz - depth / 2.0
# List of 8 vertices of the box
self._vertices = [
[right, top, front], # Front Top Right
[left, top, front], # Front Top Left
[left, bottom, front], # Front Bottom Left
[right, bottom, front], # Front Bottom Right
[right, top, rear], # Rear Top Right
[left, top, rear], # Rear Top Left
[left, bottom, rear], # Rear Bottom Left
[right, bottom, rear], # Rear Bottom Right
self.center_location, # Center
]
else:
sx, sy, sz = self.size3d
forward = np.array(self.coord_system.forward, dtype=float) * sy * 0.5
up = np.array(self.coord_system.up, dtype=float) * sz * 0.5
right = np.array(self.coord_system.right, dtype=float) * sx * 0.5
center = np.array(self.center_location, dtype=float)
self._vertices = [
center + forward + up + right, # Front Top Right
center + forward + up - right, # Front Top Left
center + forward - up - right, # Front Bottom Left
center + forward - up + right, # Front Bottom Right
center - forward + up + right, # Rear Top Right
center - forward + up - right, # Rear Top Left
center - forward - up - right, # Rear Bottom Left
center - forward - up + right, # Rear Bottom Right
self.center_location, # Center
]

View file

@ -0,0 +1,146 @@
# Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
import cv2
import numpy as np
from cuboid import CuboidVertexType
from pyrr import Quaternion
class CuboidPNPSolver(object):
"""
This class is used to find the 6-DoF pose of a cuboid given its projected vertices.
Runs perspective-n-point (PNP) algorithm.
"""
# Class variables
cv2version = cv2.__version__.split(".")
cv2majorversion = int(cv2version[0])
def __init__(
self,
object_name="",
camera_intrinsic_matrix=None,
cuboid3d=None,
dist_coeffs=np.zeros((4, 1)),
):
self.object_name = object_name
if not camera_intrinsic_matrix is None:
self._camera_intrinsic_matrix = camera_intrinsic_matrix
else:
self._camera_intrinsic_matrix = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
self._cuboid3d = cuboid3d
self._dist_coeffs = dist_coeffs
def set_camera_intrinsic_matrix(self, new_intrinsic_matrix):
"""Sets the camera intrinsic matrix"""
self._camera_intrinsic_matrix = new_intrinsic_matrix
def set_dist_coeffs(self, dist_coeffs):
"""Sets the camera intrinsic matrix"""
self._dist_coeffs = dist_coeffs
def solve_pnp(self, cuboid2d_points, pnp_algorithm=None):
"""
Detects the rotation and traslation
of a cuboid object from its vertexes'
2D location in the image
"""
# Fallback to default PNP algorithm base on OpenCV version
if pnp_algorithm is None:
if CuboidPNPSolver.cv2majorversion == 2:
pnp_algorithm = cv2.CV_ITERATIVE
elif CuboidPNPSolver.cv2majorversion == 3:
pnp_algorithm = cv2.SOLVEPNP_ITERATIVE
if pnp_algorithm is None:
pnp_algorithm = cv2.SOLVEPNP_EPNP
location = None
quaternion = None
projected_points = cuboid2d_points
cuboid3d_points = np.array(self._cuboid3d.get_vertices())
obj_2d_points = []
obj_3d_points = []
for i in range(CuboidVertexType.TotalVertexCount):
check_point_2d = cuboid2d_points[i]
# Ignore invalid points
if check_point_2d is None:
continue
obj_2d_points.append(check_point_2d)
obj_3d_points.append(cuboid3d_points[i])
obj_2d_points = np.array(obj_2d_points, dtype=float)
obj_3d_points = np.array(obj_3d_points, dtype=float)
valid_point_count = len(obj_2d_points)
# Can only do PNP if we have more than 3 valid points
is_points_valid = valid_point_count >= 4
if is_points_valid:
ret, rvec, tvec = cv2.solvePnP(
obj_3d_points,
obj_2d_points,
self._camera_intrinsic_matrix,
self._dist_coeffs,
flags=pnp_algorithm,
)
if ret:
location = list(x[0] for x in tvec)
quaternion = self.convert_rvec_to_quaternion(rvec)
projected_points, _ = cv2.projectPoints(
cuboid3d_points,
rvec,
tvec,
self._camera_intrinsic_matrix,
self._dist_coeffs,
)
projected_points = np.squeeze(projected_points)
# If the location.Z is negative or object is behind the camera then flip both location and rotation
x, y, z = location
if z < 0:
# Get the opposite location
location = [-x, -y, -z]
# Change the rotation by 180 degree
rotate_angle = np.pi
rotate_quaternion = Quaternion.from_axis_rotation(
location, rotate_angle
)
quaternion = rotate_quaternion.cross(quaternion)
return location, quaternion, projected_points
def convert_rvec_to_quaternion(self, rvec):
"""Convert rvec (which is log quaternion) to quaternion"""
theta = np.sqrt(
rvec[0] * rvec[0] + rvec[1] * rvec[1] + rvec[2] * rvec[2]
) # in radians
raxis = [rvec[0] / theta, rvec[1] / theta, rvec[2] / theta]
# pyrr's Quaternion (order is XYZW), https://pyrr.readthedocs.io/en/latest/oo_api_quaternion.html
return Quaternion.from_axis_rotation(raxis, theta)
def project_points(self, rvec, tvec):
"""Project points from model onto image using rotation, translation"""
output_points, tmp = cv2.projectPoints(
self.__object_vertex_coordinates,
rvec,
tvec,
self.__camera_intrinsic_matrix,
self.__dist_coeffs,
)
output_points = np.squeeze(output_points)
return output_points

View file

@ -0,0 +1,901 @@
# Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
"""
Contains the following classes:
- ModelData - High level information encapsulation
- ObjectDetector - Greedy algorithm to build cuboids from belief maps
"""
# 14.06.2024 @shalenikol find_object_poses: remove "cuboid2d"
import time
import sys
from os import path
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.autograd import Variable
import torchvision.models as models
from scipy.ndimage.filters import gaussian_filter
from scipy import optimize
import sys
sys.path.append("../")
from models import *
# Import the definition of the neural network model and cuboids
from cuboid_pnp_solver import *
# global transform for image input
transform = transforms.Compose(
[
# transforms.Scale(IMAGE_SIZE),
# transforms.CenterCrop((imagesize,imagesize)),
transforms.ToTensor(),
# transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]
)
# ================================ Models ================================
class DopeNetwork(nn.Module):
def __init__(
self,
numBeliefMap=9,
numAffinity=16,
stop_at_stage=6, # number of stages to process (if less than total number of stages)
):
super(DopeNetwork, self).__init__()
self.stop_at_stage = stop_at_stage
vgg_full = models.vgg19(pretrained=False).features
self.vgg = nn.Sequential()
for i_layer in range(24):
self.vgg.add_module(str(i_layer), vgg_full[i_layer])
# Add some layers
i_layer = 23
self.vgg.add_module(
str(i_layer), nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 1), nn.ReLU(inplace=True))
self.vgg.add_module(
str(i_layer + 2), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 3), nn.ReLU(inplace=True))
# print('---Belief------------------------------------------------')
# _2 are the belief map stages
self.m1_2 = DopeNetwork.create_stage(128, numBeliefMap, True)
self.m2_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m3_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m4_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m5_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m6_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
# print('---Affinity----------------------------------------------')
# _1 are the affinity map stages
self.m1_1 = DopeNetwork.create_stage(128, numAffinity, True)
self.m2_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m3_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m4_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m5_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m6_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
def forward(self, x):
"""Runs inference on the neural network"""
out1 = self.vgg(x)
out1_2 = self.m1_2(out1)
out1_1 = self.m1_1(out1)
if self.stop_at_stage == 1:
return [out1_2], [out1_1]
out2 = torch.cat([out1_2, out1_1, out1], 1)
out2_2 = self.m2_2(out2)
out2_1 = self.m2_1(out2)
if self.stop_at_stage == 2:
return [out1_2, out2_2], [out1_1, out2_1]
out3 = torch.cat([out2_2, out2_1, out1], 1)
out3_2 = self.m3_2(out3)
out3_1 = self.m3_1(out3)
if self.stop_at_stage == 3:
return [out1_2, out2_2, out3_2], [out1_1, out2_1, out3_1]
out4 = torch.cat([out3_2, out3_1, out1], 1)
out4_2 = self.m4_2(out4)
out4_1 = self.m4_1(out4)
if self.stop_at_stage == 4:
return [out1_2, out2_2, out3_2, out4_2], [out1_1, out2_1, out3_1, out4_1]
out5 = torch.cat([out4_2, out4_1, out1], 1)
out5_2 = self.m5_2(out5)
out5_1 = self.m5_1(out5)
if self.stop_at_stage == 5:
return [out1_2, out2_2, out3_2, out4_2, out5_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
]
out6 = torch.cat([out5_2, out5_1, out1], 1)
out6_2 = self.m6_2(out6)
out6_1 = self.m6_1(out6)
return [out1_2, out2_2, out3_2, out4_2, out5_2, out6_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
out6_1,
]
@staticmethod
def create_stage(in_channels, out_channels, first=False):
"""Create the neural network layers for a single stage."""
model = nn.Sequential()
mid_channels = 128
if first:
padding = 1
kernel = 3
count = 6
final_channels = 512
else:
padding = 3
kernel = 7
count = 10
final_channels = mid_channels
# First convolution
model.add_module(
"0",
nn.Conv2d(
in_channels, mid_channels, kernel_size=kernel, stride=1, padding=padding
),
)
# Middle convolutions
i = 1
while i < count - 1:
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i),
nn.Conv2d(
mid_channels,
mid_channels,
kernel_size=kernel,
stride=1,
padding=padding,
),
)
i += 1
# Penultimate convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(mid_channels, final_channels, kernel_size=1, stride=1)
)
i += 1
# Last convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(final_channels, out_channels, kernel_size=1, stride=1)
)
i += 1
return model
class ModelData(object):
"""This class contains methods for loading the neural network"""
def __init__(self, name="", net_path="", gpu_id=0, architecture="dope"):
self.name = name
self.net_path = net_path # Path to trained network model
self.net = None # Trained network
self.gpu_id = gpu_id
self.architecture = architecture
def get_net(self):
"""Returns network"""
if not self.net:
self.load_net_model()
return self.net
def load_net_model(self):
"""Loads network model from disk"""
if not self.net and path.exists(self.net_path):
self.net = self.load_net_model_path(self.net_path)
if not path.exists(self.net_path):
print("ERROR: Unable to find model weights: '{}'".format(self.net_path))
exit(0)
def load_net_model_path(self, path):
"""Loads network model from disk with given path"""
model_loading_start_time = time.time()
print("Loading DOPE model '{}'...".format(path))
net = DopeNetwork()
net = torch.nn.DataParallel(net, [0]).cuda()
net.load_state_dict(torch.load(path))
net.eval()
print(
" Model loaded in {:.2f} seconds.".format(
time.time() - model_loading_start_time
)
)
return net
def __str__(self):
"""Converts to string"""
return "{}: {}".format(self.name, self.net_path)
# ================================ ObjectDetector ================================
class ObjectDetector(object):
"""This class contains methods for object detection"""
@staticmethod
def gaussian(height, center_x, center_y, width_x, width_y):
"""Returns a gaussian function with the given parameters"""
width_x = float(width_x)
width_y = float(width_y)
return lambda x, y: height * np.exp(
-(((center_x - x) / width_x) ** 2 + ((center_y - y) / width_y) ** 2) / 2
)
@staticmethod
def moments(data):
"""Returns (height, x, y, width_x, width_y)
the gaussian parameters of a 2D distribution by calculating its
moments"""
total = data.sum()
X, Y = np.indices(data.shape)
x = (X * data).sum() / total
y = (Y * data).sum() / total
col = data[:, int(y)]
width_x = np.sqrt(
np.abs((np.arange(col.size) - y) ** 2 * col).sum() / col.sum()
)
row = data[int(x), :]
width_y = np.sqrt(
np.abs((np.arange(row.size) - x) ** 2 * row).sum() / row.sum()
)
height = data.max()
return height, x, y, width_x, width_y
@staticmethod
def fitgaussian(data):
"""Returns (height, x, y, width_x, width_y)
the gaussian parameters of a 2D distribution found by a fit"""
params = ObjectDetector.moments(data)
errorfunction = lambda p: np.ravel(
ObjectDetector.gaussian(*p)(*np.indices(data.shape)) - data
)
p, success = optimize.leastsq(errorfunction, params)
return p
@staticmethod
def make_grid(
tensor,
nrow=8,
padding=2,
normalize=False,
range_=None,
scale_each=False,
pad_value=0,
):
"""Make a grid of images.
Args:
tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
or a list of images all of the same size.
nrow (int, optional): Number of images displayed in each row of the grid.
The Final grid size is (B / nrow, nrow). Default is 8.
padding (int, optional): amount of padding. Default is 2.
normalize (bool, optional): If True, shift the image to the range (0, 1),
by subtracting the minimum and dividing by the maximum pixel value.
range (tuple, optional): tuple (min, max) where min and max are numbers,
then these numbers are used to normalize the image. By default, min and max
are computed from the tensor.
scale_each (bool, optional): If True, scale each image in the batch of
images separately rather than the (min, max) over all images.
pad_value (float, optional): Value for the padded pixels.
Example:
See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_
"""
import math
if not (
torch.is_tensor(tensor)
or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))
):
raise TypeError(
"tensor or list of tensors expected, got {}".format(type(tensor))
)
# if list of tensors, convert to a 4D mini-batch Tensor
if isinstance(tensor, list):
tensor = torch.stack(tensor, dim=0)
if tensor.dim() == 2: # single image H x W
tensor = tensor.view(1, tensor.size(0), tensor.size(1))
if tensor.dim() == 3: # single image
if tensor.size(0) == 1: # if single-channel, convert to 3-channel
tensor = torch.cat((tensor, tensor, tensor), 0)
tensor = tensor.view(1, tensor.size(0), tensor.size(1), tensor.size(2))
if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
tensor = torch.cat((tensor, tensor, tensor), 1)
if normalize is True:
tensor = tensor.clone() # avoid modifying tensor in-place
if range_ is not None:
assert isinstance(
range_, tuple
), "range has to be a tuple (min, max) if specified. min and max are numbers"
def norm_ip(img, min, max):
img.clamp_(min=min, max=max)
img.add_(-min).div_(max - min + 1e-5)
def norm_range(t, range_):
if range_ is not None:
norm_ip(t, range_[0], range_[1])
else:
norm_ip(t, float(t.min()), float(t.max()))
if scale_each is True:
for t in tensor: # loop over mini-batch dimension
norm_range(t, range)
else:
norm_range(tensor, range)
if tensor.size(0) == 1:
return tensor.squeeze()
# make the mini-batch of images into a grid
nmaps = tensor.size(0)
xmaps = min(nrow, nmaps)
ymaps = int(math.ceil(float(nmaps) / xmaps))
height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(
pad_value
)
k = 0
for y in range(ymaps):
for x in range(xmaps):
if k >= nmaps:
break
grid.narrow(1, y * height + padding, height - padding).narrow(
2, x * width + padding, width - padding
).copy_(tensor[k])
k = k + 1
return grid
@staticmethod
def get_image_grid(tensor, filename, nrow=3, padding=2, mean=None, std=None):
"""
Saves a given Tensor into an image file.
If given a mini-batch tensor, will save the tensor as a grid of images.
"""
from PIL import Image
# tensor = tensor.cpu()
grid = ObjectDetector.make_grid(tensor, nrow=nrow, padding=10, pad_value=1)
if not mean is None:
# ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0,2).transpose(0,1).numpy()
ndarr = (
grid.mul(std)
.add(mean)
.mul(255)
.byte()
.transpose(0, 2)
.transpose(0, 1)
.numpy()
)
else:
ndarr = (
grid.mul(0.5)
.add(0.5)
.mul(255)
.byte()
.transpose(0, 2)
.transpose(0, 1)
.numpy()
)
im = Image.fromarray(ndarr)
# im.save(filename)
return im
@staticmethod
def detect_object_in_image(
net_model, pnp_solver, in_img, config, grid_belief_debug=False, norm_belief=True
):
"""Detect objects in a image using a specific trained network model
Returns the poses of the objects and the belief maps
"""
if in_img is None:
return []
# print("detect_object_in_image - image shape: {}".format(in_img.shape))
# Run network inference
image_tensor = transform(in_img)
image_torch = Variable(image_tensor).cuda().unsqueeze(0)
out, seg = net_model(
image_torch
) # run inference using the network (calls 'forward' method)
vertex2 = out[-1][0]
aff = seg[-1][0]
# Find objects from network output
detected_objects = ObjectDetector.find_object_poses(
vertex2, aff, pnp_solver, config
)
if not grid_belief_debug:
return detected_objects, None
else:
# Run the belief maps debug display on the beliefmaps
upsampling = nn.UpsamplingNearest2d(scale_factor=8)
tensor = vertex2
belief_imgs = []
in_img = torch.tensor(in_img).float() / 255.0
in_img *= 0.7
for j in range(tensor.size()[0]):
belief = tensor[j].clone()
if norm_belief:
belief -= float(torch.min(belief)[0].data.cpu().numpy())
belief /= float(torch.max(belief)[0].data.cpu().numpy())
belief = (
upsampling(belief.unsqueeze(0).unsqueeze(0))
.squeeze()
.squeeze()
.data
)
belief = torch.clamp(belief, 0, 1).cpu()
belief = torch.cat(
[
belief.unsqueeze(0) + in_img[:, :, 0],
belief.unsqueeze(0) + in_img[:, :, 1],
belief.unsqueeze(0) + in_img[:, :, 2],
]
).unsqueeze(0)
belief = torch.clamp(belief, 0, 1)
# belief_imgs.append(belief.data.squeeze().cpu().numpy().transpose(1,2,0))
belief_imgs.append(belief.data.squeeze().numpy())
# Create the image grid
belief_imgs = torch.tensor(np.array(belief_imgs))
im_belief = ObjectDetector.get_image_grid(belief_imgs, None, mean=0, std=1)
return detected_objects, im_belief
@staticmethod
def find_object_poses(
vertex2,
aff,
pnp_solver,
config,
run_sampling=False,
num_sample=100,
scale_factor=8,
):
"""Detect objects given network output"""
# run_sampling = True
# Detect objects from belief maps and affinities
objects, all_peaks = ObjectDetector.find_objects(
vertex2,
aff,
config,
run_sampling=run_sampling,
num_sample=num_sample,
scale_factor=scale_factor,
)
detected_objects = []
obj_name = pnp_solver.object_name
print(all_peaks)
# print("find_object_poses: found {} objects ================".format(len(objects)))
for obj in objects:
# Run PNP
points = obj[1] + [(obj[0][0] * scale_factor, obj[0][1] * scale_factor)]
# print(points)
# cuboid2d = np.copy(points)
location, quaternion, projected_points = pnp_solver.solve_pnp(points)
# run multiple sample
if run_sampling:
lx, ly, lz = [], [], []
qx, qy, qz, qw = [], [], [], []
for i_sample in range(num_sample):
sample = []
for i_point in range(len(obj[-1])):
if not obj[-1][i_point][i_sample] is None:
sample.append(
(
obj[-1][i_point][i_sample][0] * scale_factor,
obj[-1][i_point][i_sample][1] * scale_factor,
)
)
else:
sample.append(None)
# final_cuboids.append(sample)
pnp_sample = pnp_solver.solve_pnp(sample)
try:
lx.append(pnp_sample[0][0])
ly.append(pnp_sample[0][1])
lz.append(pnp_sample[0][2])
qx.append(pnp_sample[1][0])
qy.append(pnp_sample[1][1])
qz.append(pnp_sample[1][2])
qw.append(pnp_sample[1][3])
except:
pass
# TODO
# RUN quaternion as well for the std and avg.
try:
print("----")
print("location:")
print(location[0], location[1], location[2])
print(np.mean(lx), np.mean(ly), np.mean(lz))
print(np.std(lx), np.std(ly), np.std(lz))
print("quaternion:")
print(quaternion[0], quaternion[1], quaternion[2], quaternion[3])
print(np.mean(qx), np.mean(qy), np.mean(qz), np.mean(qw))
print(np.std(qx), np.std(qy), np.std(qz), np.std(qw))
except:
pass
if not location is None:
detected_objects.append(
{
"name": obj_name,
"location": location,
"quaternion": quaternion,
# "cuboid2d": cuboid2d,
"projected_points": projected_points,
"confidence": obj[-1],
"raw_points": points,
}
)
# print("find_object_poses: points = ", type(points), points)
# print("find_object_poses: locn = ", location, "quat =", quaternion)
# print("find_object_poses: projected_points = ", type(projected_points), projected_points)
return detected_objects
@staticmethod
def find_objects(
vertex2,
aff,
config,
numvertex=8,
run_sampling=False,
num_sample=100,
scale_factor=8,
):
"""Detects objects given network belief maps and affinities, using heuristic method"""
all_peaks = []
all_samples = []
peak_counter = 0
for j in range(vertex2.size()[0]):
belief = vertex2[j].clone()
map_ori = belief.cpu().data.numpy()
map = gaussian_filter(belief.cpu().data.numpy(), sigma=config.sigma)
p = 1
map_left = np.zeros(map.shape)
map_left[p:, :] = map[:-p, :]
map_right = np.zeros(map.shape)
map_right[:-p, :] = map[p:, :]
map_up = np.zeros(map.shape)
map_up[:, p:] = map[:, :-p]
map_down = np.zeros(map.shape)
map_down[:, :-p] = map[:, p:]
peaks_binary = np.logical_and.reduce(
(
map >= map_left,
map >= map_right,
map >= map_up,
map >= map_down,
map > config.thresh_map,
)
)
peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])
# Computing the weigthed average for localizing the peaks
peaks = list(peaks)
win = 11
ran = win // 2
peaks_avg = []
point_sample_list = []
for p_value in range(len(peaks)):
p = peaks[p_value]
weights = np.zeros((win, win))
i_values = np.zeros((win, win))
j_values = np.zeros((win, win))
for i in range(-ran, ran + 1):
for j in range(-ran, ran + 1):
if (
p[1] + i < 0
or p[1] + i >= map_ori.shape[0]
or p[0] + j < 0
or p[0] + j >= map_ori.shape[1]
):
continue
i_values[j + ran, i + ran] = p[1] + i
j_values[j + ran, i + ran] = p[0] + j
weights[j + ran, i + ran] = map_ori[p[1] + i, p[0] + j]
# if the weights are all zeros
# then add the none continuous points
OFFSET_DUE_TO_UPSAMPLING = 0.4395
# Sample the points using the gaussian
if run_sampling:
data = weights
params = ObjectDetector.fitgaussian(data)
fit = ObjectDetector.gaussian(*params)
_, mu_x, mu_y, std_x, std_y = params
points_sample = np.random.multivariate_normal(
np.array(
[
p[1] + mu_x + OFFSET_DUE_TO_UPSAMPLING,
p[0] - mu_y + OFFSET_DUE_TO_UPSAMPLING,
]
),
# np.array([[std_x*std_x,0],[0,std_y*std_y]]), size=num_sample)
np.array([[std_x, 0], [0, std_y]]),
size=num_sample,
)
point_sample_list.append(points_sample)
try:
peaks_avg.append(
(
np.average(j_values, weights=weights)
+ OFFSET_DUE_TO_UPSAMPLING,
np.average(i_values, weights=weights)
+ OFFSET_DUE_TO_UPSAMPLING,
)
)
except:
peaks_avg.append(
(
p[0] + OFFSET_DUE_TO_UPSAMPLING,
p[1] + OFFSET_DUE_TO_UPSAMPLING,
)
)
# Note: Python3 doesn't support len for zip object
peaks_len = min(
len(np.nonzero(peaks_binary)[1]), len(np.nonzero(peaks_binary)[0])
)
peaks_with_score = [
peaks_avg[x_] + (map_ori[peaks[x_][1], peaks[x_][0]],)
for x_ in range(len(peaks))
]
id = range(peak_counter, peak_counter + peaks_len)
peaks_with_score_and_id = [
peaks_with_score[i] + (id[i],) for i in range(len(id))
]
all_peaks.append(peaks_with_score_and_id)
all_samples.append(point_sample_list)
peak_counter += peaks_len
objects = []
if aff is None:
# Assume there is only one object
points = [None for i in range(numvertex)]
for i_peak, peaks in enumerate(all_peaks):
# print (peaks)
for peak in peaks:
if peak[2] > config.threshold:
points[i_peak] = (peak[0], peak[1])
return points
# Check object centroid and build the objects if the centroid is found
for nb_object in range(len(all_peaks[-1])):
if all_peaks[-1][nb_object][2] > config.thresh_points:
objects.append(
[
[
all_peaks[-1][nb_object][:2][0],
all_peaks[-1][nb_object][:2][1],
],
[None for i in range(numvertex)],
[None for i in range(numvertex)],
all_peaks[-1][nb_object][2],
[
[None for j in range(num_sample)]
for i in range(numvertex + 1)
],
]
)
# Check if the object was added before
if run_sampling and nb_object < len(objects):
# add the samples to the object centroids
objects[nb_object][4][-1] = all_samples[-1][nb_object]
# Working with an output that only has belief maps
if aff is None:
if len(objects) > 0 and len(all_peaks) > 0 and len(all_peaks[0]) > 0:
for i_points in range(8):
if (
len(all_peaks[i_points]) > 0
and all_peaks[i_points][0][2] > config.threshold
):
objects[0][1][i_points] = (
all_peaks[i_points][0][0],
all_peaks[i_points][0][1],
)
else:
# For all points found
for i_lists in range(len(all_peaks[:-1])):
lists = all_peaks[i_lists]
# Candidate refers to point that needs to be match with a centroid object
for i_candidate, candidate in enumerate(lists):
if candidate[2] < config.thresh_points:
continue
i_best = -1
best_dist = 10000
best_angle = 100
# Find the points that links to that centroid.
for i_obj in range(len(objects)):
center = [objects[i_obj][0][0], objects[i_obj][0][1]]
# integer is used to look into the affinity map,
# but the float version is used to run
point_int = [int(candidate[0]), int(candidate[1])]
point = [candidate[0], candidate[1]]
# look at the distance to the vector field.
v_aff = (
np.array(
[
aff[
i_lists * 2, point_int[1], point_int[0]
].data.item(),
aff[
i_lists * 2 + 1, point_int[1], point_int[0]
].data.item(),
]
)
* 10
)
# normalize the vector
xvec = v_aff[0]
yvec = v_aff[1]
norms = np.sqrt(xvec * xvec + yvec * yvec)
xvec /= norms
yvec /= norms
v_aff = np.concatenate([[xvec], [yvec]])
v_center = np.array(center) - np.array(point)
xvec = v_center[0]
yvec = v_center[1]
norms = np.sqrt(xvec * xvec + yvec * yvec)
xvec /= norms
yvec /= norms
v_center = np.concatenate([[xvec], [yvec]])
# vector affinity
dist_angle = np.linalg.norm(v_center - v_aff)
# distance between vertexes
dist_point = np.linalg.norm(np.array(point) - np.array(center))
if (
dist_angle < config.thresh_angle
and best_dist > 1000
or dist_angle < config.thresh_angle
and best_dist > dist_point
):
i_best = i_obj
best_angle = dist_angle
best_dist = dist_point
if i_best == -1:
continue
if (
objects[i_best][1][i_lists] is None
or best_angle < config.thresh_angle
and best_dist < objects[i_best][2][i_lists][1]
):
# set the points
objects[i_best][1][i_lists] = (
(candidate[0]) * scale_factor,
(candidate[1]) * scale_factor,
)
# set information about the points: angle and distance
objects[i_best][2][i_lists] = (best_angle, best_dist)
# add the sample points
if run_sampling:
objects[i_best][4][i_lists] = all_samples[i_lists][
i_candidate
]
return objects, all_peaks

View file

@ -0,0 +1,196 @@
"""
NVIDIA from jtremblay@gmail.com
"""
# Networks
import torch
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
import torchvision.models as models
class DopeNetwork(nn.Module):
def __init__(
self,
pretrained=False,
numBeliefMap=9,
numAffinity=16,
stop_at_stage=6, # number of stages to process (if less than total number of stages)
):
super(DopeNetwork, self).__init__()
self.stop_at_stage = stop_at_stage
vgg_full = models.vgg19(pretrained=False).features
self.vgg = nn.Sequential()
for i_layer in range(24):
self.vgg.add_module(str(i_layer), vgg_full[i_layer])
# Add some layers
i_layer = 23
self.vgg.add_module(
str(i_layer), nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 1), nn.ReLU(inplace=True))
self.vgg.add_module(
str(i_layer + 2), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
)
self.vgg.add_module(str(i_layer + 3), nn.ReLU(inplace=True))
# print('---Belief------------------------------------------------')
# _2 are the belief map stages
self.m1_2 = DopeNetwork.create_stage(128, numBeliefMap, True)
self.m2_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m3_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m4_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m5_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
self.m6_2 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numBeliefMap, False
)
# print('---Affinity----------------------------------------------')
# _1 are the affinity map stages
self.m1_1 = DopeNetwork.create_stage(128, numAffinity, True)
self.m2_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m3_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m4_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m5_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
self.m6_1 = DopeNetwork.create_stage(
128 + numBeliefMap + numAffinity, numAffinity, False
)
def forward(self, x):
"""Runs inference on the neural network"""
out1 = self.vgg(x)
out1_2 = self.m1_2(out1)
out1_1 = self.m1_1(out1)
if self.stop_at_stage == 1:
return [out1_2], [out1_1]
out2 = torch.cat([out1_2, out1_1, out1], 1)
out2_2 = self.m2_2(out2)
out2_1 = self.m2_1(out2)
if self.stop_at_stage == 2:
return [out1_2, out2_2], [out1_1, out2_1]
out3 = torch.cat([out2_2, out2_1, out1], 1)
out3_2 = self.m3_2(out3)
out3_1 = self.m3_1(out3)
if self.stop_at_stage == 3:
return [out1_2, out2_2, out3_2], [out1_1, out2_1, out3_1]
out4 = torch.cat([out3_2, out3_1, out1], 1)
out4_2 = self.m4_2(out4)
out4_1 = self.m4_1(out4)
if self.stop_at_stage == 4:
return [out1_2, out2_2, out3_2, out4_2], [out1_1, out2_1, out3_1, out4_1]
out5 = torch.cat([out4_2, out4_1, out1], 1)
out5_2 = self.m5_2(out5)
out5_1 = self.m5_1(out5)
if self.stop_at_stage == 5:
return [out1_2, out2_2, out3_2, out4_2, out5_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
]
out6 = torch.cat([out5_2, out5_1, out1], 1)
out6_2 = self.m6_2(out6)
out6_1 = self.m6_1(out6)
return [out1_2, out2_2, out3_2, out4_2, out5_2, out6_2], [
out1_1,
out2_1,
out3_1,
out4_1,
out5_1,
out6_1,
]
@staticmethod
def create_stage(in_channels, out_channels, first=False):
"""Create the neural network layers for a single stage."""
model = nn.Sequential()
mid_channels = 128
if first:
padding = 1
kernel = 3
count = 6
final_channels = 512
else:
padding = 3
kernel = 7
count = 10
final_channels = mid_channels
# First convolution
model.add_module(
"0",
nn.Conv2d(
in_channels, mid_channels, kernel_size=kernel, stride=1, padding=padding
),
)
# Middle convolutions
i = 1
while i < count - 1:
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i),
nn.Conv2d(
mid_channels,
mid_channels,
kernel_size=kernel,
stride=1,
padding=padding,
),
)
i += 1
# Penultimate convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(mid_channels, final_channels, kernel_size=1, stride=1)
)
i += 1
# Last convolution
model.add_module(str(i), nn.ReLU(inplace=True))
i += 1
model.add_module(
str(i), nn.Conv2d(final_channels, out_channels, kernel_size=1, stride=1)
)
i += 1
return model

View file

@ -0,0 +1,366 @@
#!/usr/bin/env python3
"""
pose_estimation_lifecycle_node_with_DOPE
ROS 2 program for 6D Pose Estimation
Source run:
python inference.py --weights ../output/weights_2996 --data ../sample_dataset100 --object fork --exts jpg \
--config config/config_pose_fork.yaml --camera config/camera_info_640x480.yaml
@shalenikol release 0.4
"""
import os
import json
import yaml
import rclpy
from rclpy.lifecycle import Node
from rclpy.lifecycle import State
from rclpy.lifecycle import TransitionCallbackReturn
from ament_index_python.packages import get_package_share_directory
from sensor_msgs.msg import Image, CameraInfo
from geometry_msgs.msg import Pose
from cv_bridge import CvBridge # Package to convert between ROS and OpenCV Images
import cv2 # OpenCV library
FILE_DOPE_CONFIG = "pe_dope_config.yaml"
# FILE_TEMP_IMAGE = "image_rgb.png"
CAMERA_LINK_DEFAULT = "rgbd_camera"
NODE_NAME_DEFAULT = "lc_dope" # this name must match the name in the description (["Module"]["node_name"])
PARAM_SKILL_CFG = "lc_dope_cfg"
def get_transfer_path_() -> str:
return os.path.join(get_package_share_directory("rbs_perception"), "config")
class PE_DOPE(Node):
"""Pose estimation lifecycle node with DOPE."""
def __init__(self, **kwargs):
"""Construct the node."""
# for other nodes
kwargs["allow_undeclared_parameters"] = True
kwargs["automatically_declare_parameters_from_overrides"] = True
super().__init__(NODE_NAME_DEFAULT, **kwargs)
str_cfg = self.get_parameter(PARAM_SKILL_CFG).get_parameter_value().string_value
self.skill_cfg = json.loads(str_cfg)
self.nodeName = NODE_NAME_DEFAULT # self.skill_cfg["Module"]["node_name"]
# out_par = self.skill_cfg["Interface"]["Output"][0]
# self.topicSrv = self.nodeName + "/" + out_par["name"]
self.topicSrv = self.skill_cfg["topicsOut"][0]["name"]
# Used to convert between ROS and OpenCV images
self.br = CvBridge()
self.dope_cfg = self._load_config_DOPE()
self._cam_pose = Pose()
self._is_camerainfo = False
self.topicImage = ""
self.topicCameraInfo = ""
self._set_camera_topic(CAMERA_LINK_DEFAULT)
self._sub = None
self._sub_info = None
self._pub = None
self._image_cnt = 0
self._K = []
def _load_config_DOPE(self):
p = os.path.join(get_transfer_path_(), FILE_DOPE_CONFIG)
with open(p, "r") as f:
y = yaml.load(f, Loader=yaml.FullLoader)
return y
def _set_camera_topic(self, camera_link: str):
""" Service for camera name topics """
self.topicImage = "/" + camera_link + "/image"
self.topicCameraInfo = "/" + camera_link +"/camera_info"
def listener_camera_info(self, data):
""" CameraInfo callback function. """
if self._is_camerainfo: # dont read camera info again
return
self._res = [data.height, data.width]
k_ = data.k
self._K = [
[k_[0], k_[1], k_[2]],
[k_[3], k_[4], k_[5]],
[k_[6], k_[7], k_[8]]
]
# set the indicator for receiving the camera info
self._is_camerainfo = True
def on_configure(self, state: State) -> TransitionCallbackReturn:
json_param = self.get_parameter(PARAM_SKILL_CFG).get_parameter_value().string_value
jdata = json.loads(json_param)
dependency = {}
for comm in jdata["BTAction"]:
for par in comm["param"]:
if par["type"] == "weights":
dependency = par["dependency"]
elif par["type"] == "topic":
t_dep = par["dependency"]
if "Image" in t_dep["topicType"]:
self.topicImage = t_dep["topicOut"]
else:
self.topicCameraInfo = t_dep["topicOut"]
assert dependency, "no weights dependency"
assert self.topicImage, "no input topic dependency"
# Create the subscribers.
self._sub_info = self.create_subscription(CameraInfo, self.topicCameraInfo, self.listener_camera_info, 2)
# Create the publisher.
self._pub = self.create_lifecycle_publisher(Pose, self.topicSrv, 1)
# Load model weights
w = dependency["weights_file"]
if not os.path.isfile(w):
self.get_logger().warning(f"No weights found <{w}>")
return TransitionCallbackReturn.FAILURE
obj = dependency["object_name"]
dim = dependency["dimensions"]
self.dope_node = Dope(self.dope_cfg, w, obj, dim)
self.get_logger().info(f"configure is success (with '{obj}')")
return TransitionCallbackReturn.SUCCESS
def on_activate(self, state: State) -> TransitionCallbackReturn:
self.get_logger().info("on_activate is called")
# Create the Image subscriber.
self._sub = self.create_subscription(Image, self.topicImage, self.image_callback, 3)
return super().on_activate(state)
def on_deactivate(self, state: State) -> TransitionCallbackReturn:
self.get_logger().info("on_deactivate is called")
# Destroy the Image subscriber.
self.destroy_subscription(self._sub)
return super().on_deactivate(state)
def on_cleanup(self, state: State) -> TransitionCallbackReturn:
self.destroy_publisher(self._pub)
self.destroy_subscription(self._sub_info)
self._is_camerainfo = False
self._pub = None
self.get_logger().info("on_cleanup is called")
return TransitionCallbackReturn.SUCCESS
def on_shutdown(self, state: State) -> TransitionCallbackReturn:
self.get_logger().info("on_shutdown is called")
return TransitionCallbackReturn.SUCCESS
# def test_im_proc(self):
# im = "im_tst.jpg"
# if not os.path.isfile(im):
# print(f"File not found '{im}'")
# return
# frame = cv2.imread(im)
# frame = frame[..., ::-1].copy()
# self._K = [[585.756089952257, 0.0, 319.5], [0.0, 585.756089952257, 239.5], [0.0, 0.0, 1.0]]
# # call the inference node
# p = self.dope_node.image_processing(img=frame, camera_info=self._K)
# print(f"pose = {p}")
def image_callback(self, data):
""" Image Callback function. """
if not self._is_camerainfo:
self.get_logger().warning("No data from CameraInfo")
return
# Convert ROS Image message to OpenCV image
current_frame = self.br.imgmsg_to_cv2(data)
# # Save image
# frame_im = FILE_TEMP_IMAGE # str(self.objPath / "image_rgb.png")
# cv2.imwrite(frame_im, current_frame)
self._image_cnt += 1
self.get_logger().info(f"dope: begin {self._image_cnt}")
current_frame = current_frame[..., ::-1].copy()
pose = self.dope_node.image_processing(img=current_frame, camera_info=self._K)
self.get_logger().info(f"dope: end {self._image_cnt}")
if self._pub is not None and self._pub.is_activated:
# publish pose estimation result
self._pub.publish(pose)
from detector import ModelData, ObjectDetector
from cuboid_pnp_solver import CuboidPNPSolver
from cuboid import Cuboid3d
import numpy as np
class Dope(object):
"""ROS node that listens to image topic, runs DOPE, and publishes DOPE results"""
def __init__(
self,
config, # config yaml loaded eg dict
weight, # path to weight
class_name,
dim: list # dimensions of model 'class_name'
):
self.input_is_rectified = config["input_is_rectified"]
self.downscale_height = config["downscale_height"]
self.config_detect = lambda: None
self.config_detect.mask_edges = 1
self.config_detect.mask_faces = 1
self.config_detect.vertex = 1
self.config_detect.threshold = 0.5
self.config_detect.softmax = 1000
self.config_detect.thresh_angle = config["thresh_angle"]
self.config_detect.thresh_map = config["thresh_map"]
self.config_detect.sigma = config["sigma"]
self.config_detect.thresh_points = config["thresh_points"]
# load network model, create PNP solver
self.model = ModelData(name=class_name, net_path=weight)
# TODO warn on load_net_model():
# Loading DOPE model '/home/shalenikol/robossembler_ws/fork_e47.pth'...
# /home/shalenikol/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:208:
# UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
# warnings.warn(
# /home/shalenikol/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:223:
# UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future.
# The current behavior is equivalent to passing `weights=None`.
# warnings.warn(msg)
self.model.load_net_model()
# print("Model Loaded")
# try:
# self.draw_color = tuple(config["draw_colors"][class_name])
# except:
self.draw_color = (0, 255, 0)
# TODO load dim from config
# dim = [13.7, 16.5, 20.2] # config["dimensions"][class_name]
self.dimension = tuple(dim)
self.class_id = 1 #config["class_ids"][class_name]
self.pnp_solver = CuboidPNPSolver(class_name, cuboid3d=Cuboid3d(dim))
self.class_name = class_name
# print("Ctrl-C to stop")
def image_processing(
self,
img,
camera_info
# img_name, # this is the name of the img file to save, it needs the .png at the end
# output_folder, # folder where to put the output
# weight
) -> Pose:
# !!! Allways self.input_is_rectified = True
camera_matrix = np.matrix(camera_info, dtype="float64").copy()
dist_coeffs = np.zeros((4, 1))
# Update camera matrix and distortion coefficients
# if self.input_is_rectified:
# P = np.matrix(
# camera_info["projection_matrix"]["data"], dtype="float64"
# ).copy()
# P.resize((3, 4))
# camera_matrix = P[:, :3]
# dist_coeffs = np.zeros((4, 1))
# else:
# # TODO ???
# camera_matrix = np.matrix(camera_info.K, dtype="float64")
# camera_matrix.resize((3, 3))
# dist_coeffs = np.matrix(camera_info.D, dtype="float64")
# dist_coeffs.resize((len(camera_info.D), 1))
# Downscale image if necessary
height, width, _ = img.shape
scaling_factor = float(self.downscale_height) / height
if scaling_factor < 1.0:
camera_matrix[:2] *= scaling_factor
img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))
self.pnp_solver.set_camera_intrinsic_matrix(camera_matrix)
self.pnp_solver.set_dist_coeffs(dist_coeffs)
# # Copy and draw image
# img_copy = img.copy()
# im = Image.fromarray(img_copy)
# draw = Draw(im)
# # dictionary for the final output
# dict_out = {"camera_data": {}, "objects": []}
# Detect object
results, _ = ObjectDetector.detect_object_in_image(
self.model.net, self.pnp_solver, img, self.config_detect
)
# Publish pose #and overlay cube on image
p = Pose()
for _, result in enumerate(results):
if result["location"] is None:
continue
l = result["location"]
q = result["quaternion"]
p.position.x = l[0]
p.position.y = l[1]
p.position.z = l[2]
p.orientation.x = q[0]
p.orientation.y = q[1]
p.orientation.z = q[2]
p.orientation.w = q[3]
break # !!! only considering the first option for now
return p
# # save the json files
# with open(f"tmp_result{i}.json", "w") as fp:
# json.dump(result, fp, indent=2)
# dict_out["objects"].append(
# {
# "class": self.class_name,
# "location": np.array(loc).tolist(),
# "quaternion_xyzw": np.array(ori).tolist(),
# "projected_cuboid": np.array(result["projected_points"]).tolist(),
# }
# )
# # Draw the cube
# if None not in result["projected_points"]:
# points2d = []
# for pair in result["projected_points"]:
# points2d.append(tuple(pair))
# draw.draw_cube(points2d, self.draw_color)
# # create directory to save image if it does not exist
# img_name_base = img_name.split("/")[-1]
# output_path = os.path.join(
# output_folder,
# weight.split("/")[-1].replace(".pth", ""),
# *img_name.split("/")[:-1],
# )
# if not os.path.isdir(output_path):
# os.makedirs(output_path, exist_ok=True)
# im.save(os.path.join(output_path, img_name_base))
# json_path = os.path.join(
# output_path, ".".join(img_name_base.split(".")[:-1]) + ".json"
# )
# # save the json files
# with open(json_path, "w") as fp:
# json.dump(dict_out, fp, indent=2)
def main():
rclpy.init()
executor = rclpy.executors.SingleThreadedExecutor()
# executor = rclpy.executors.MultiThreadedExecutor()
lc_node = PE_DOPE()
executor.add_node(lc_node)
try:
executor.spin()
except (KeyboardInterrupt, rclpy.executors.ExternalShutdownException):
lc_node.destroy_node()
if __name__ == '__main__':
main()