add object detection (yolov4)

2023-12-07 11:04:45 +03:00 · 2023-12-07 11:04:45 +03:00 · eac306c479
commit eac306c479
parent 2cd29d6d0d
3 changed files with 73 additions and 9 deletions
--- a/rbs_bt_executor/bt_trees/test_tree.xml
+++ b/rbs_bt_executor/bt_trees/test_tree.xml
@ -3,7 +3,7 @@
    <BehaviorTree ID="PoseEstimation">
        <Sequence>
            <Action ID="PoseEstimation" 
-            ObjectName="!/home/$USERNAME/robossembler_ws/src/robossembler-ros2/rbs_perception/config/str_param.json"
+            ObjectName="!/home/shalenikol/robossembler_ws/src/robossembler-ros2/rbs_perception/config/str_param.json"
            ReqKind = "calibrate"
            server_name="/pose_estimation/change_state" 
            server_timeout="1000"/>
--- a/rbs_perception/config/str_param.json
+++ b/rbs_perception/config/str_param.json
@ -1,4 +1,5 @@
 {
- "mesh_path":"/home/$USERNAME/robossembler_ws/src/robossembler-ros2/rbs_perception/config/fork.ply",
- "gtpose":[1.3,0.0,0.0,0.0,0.0,0.0]
+ "mesh_path":"/home/shalenikol/robossembler_ws/src/robossembler-ros2/rbs_perception/config/fork.ply",
+ "gtpose":[1.3,0.0,0.0,0.0,0.0,0.0],
+ "darknet_path":"/home/shalenikol/test_detection"
 }
--- a/rbs_perception/scripts/pose_estimation_lifecycle.py
+++ b/rbs_perception/scripts/pose_estimation_lifecycle.py
@ -77,6 +77,7 @@ class PoseEstimator(Node):
        self.tf2_send_pose = 0
        self.mesh_scale = 1.0
        self.megapose_model = None
+        self.darknet_path = ""

        self.nodeName = node_name
        self.topicImage = "/outer_rgb_camera/image"
@ -214,7 +215,9 @@ class PoseEstimator(Node):
                return TransitionCallbackReturn.FAILURE
            mesh_path = y["mesh_path"]
            if "gtpose" in y:
-              gtpose = y["gtpose"]
+                gtpose = y["gtpose"]
+            if "darknet_path" in y:
+                self.darknet_path = y["darknet_path"]
        else:
            mesh_path = str_param

@ -324,12 +327,17 @@ class PoseEstimator(Node):
        self._res = [data.height, data.width]
        k_ = data.k
        self._K = [
+            [k_[0], k_[1], k_[2]],
+            [k_[3], k_[4], k_[5]],
+            [k_[6], k_[7], k_[8]]
+        ]
+        """self._K = [
            [k_[0]*2.0, k_[1], data.width / 2.0], # k_[2]], #
            [k_[3], k_[4]*2.0, data.height / 2.0], # k_[5]], #
            [k_[6], k_[7], k_[8]] #self.mesh_scale]
-        ]
+        ]"""

-        tPath = self.objPath / "inputs"
+        """tPath = self.objPath / "inputs"
        #{"label": "fork", "bbox_modal": [329, 189, 430, 270]}
        output_fn = tPath / "object_data.json"
        output_json_dict = {
@ -338,7 +346,7 @@ class PoseEstimator(Node):
        }
        data = []
        data.append(output_json_dict)
-        output_fn.write_text(json.dumps(data))
+        output_fn.write_text(json.dumps(data))"""

        #{"K": [[25.0, 0.0, 8.65], [0.0, 25.0, 6.5], [0.0, 0.0, 1.0]], "resolution": [480, 640]}
        output_fn = self.objPath / "camera_data.json"
@ -372,6 +380,44 @@ class PoseEstimator(Node):
        else:
            data = "No result file: '" + str(f) + "'"
        return data
+    
+    def rel2bbox(self, rel_coord):
+        bb_w = rel_coord["width"]
+        bb_h = rel_coord["height"]
+        x = int((rel_coord["center_x"] - bb_w/2.) * self._res[1])
+        y = int((rel_coord["center_y"] - bb_h/2.) * self._res[0])
+        w = int(bb_w * self._res[1])
+        h = int(bb_h * self._res[0])
+        return [x,y,w,h]
+    
+    def yolo2megapose(self, res_json: str, path_to: Path) -> bool:
+        str_param = Path(res_json).read_text()
+        y = json.loads(str_param)[0]
+        conf = 0.75 # threshold of detection
+        found_coord = None
+        for detections in y["objects"]:
+            if detections["name"] == self.objName:
+                c_conf = detections["confidence"]
+                if c_conf > conf:
+                    conf = c_conf
+                    found_coord = detections["relative_coordinates"]
+
+        if found_coord:
+            bbox = self.rel2bbox(found_coord)
+        else:
+            bbox = [2, 2, self._res[1]-4, self._res[0]-4]
+
+        #tPath = path_to / "inputs"
+        #{"label": "fork", "bbox_modal": [329, 189, 430, 270]}
+        output_fn = path_to / "inputs/object_data.json"
+        output_json_dict = {
+            "label": self.objName,
+            "bbox_modal": bbox #[288,170,392,253]
+        }
+        data = []
+        data.append(output_json_dict)
+        output_fn.write_text(json.dumps(data))
+        return bool(found_coord)

    def listener_callback(self, data):
        """
@ -385,10 +431,27 @@ class PoseEstimator(Node):
        current_frame = self.br.imgmsg_to_cv2(data)
        
        # Save image for Megapose
-        cv2.imwrite(str(self.objPath / "image_rgb.png"), current_frame)
+        frame_im = str(self.objPath / "image_rgb.png")
+        cv2.imwrite(frame_im, current_frame)
        self._image_cnt += 1
        
-        if self.megapose_model:
+        detected = False
+        darknet_bin = os.path.join(self.darknet_path, "darknet")
+        if os.path.isfile(darknet_bin):
+            # object detection (YoloV4 - darknet)
+            self.get_logger().info(f"darknet: begin {self._image_cnt}")
+            result_json = str(self.objPath / "res.json")
+            subprocess.run([darknet_bin, "detector", "test",
+                            os.path.join(self.darknet_path, "yolov4_objs2.data"),
+                            os.path.join(self.darknet_path, "yolov4_objs2.cfg"),
+                            os.path.join(self.darknet_path, "yolov4.weights"),
+                            "-dont_show", "-ext_output",
+                            "-out", result_json, frame_im]
+                        )
+            detected = self.yolo2megapose(result_json, self.objPath)
+            self.get_logger().info(f"darknet: end {self._image_cnt}")
+
+        if detected and self.megapose_model:
            # 6D pose estimation
            self.get_logger().info(f"megapose: begin {self._image_cnt} {self.objPath}")
            #run_inference(self.objPath,"megapose-1.0-RGB-multi-hypothesis")