Added rbs_gym package for RL & multi-robot launch setup

2024-07-04 11:38:08 +00:00 · 2024-07-04 11:38:08 +00:00 · b58307dea1
commit b58307dea1
parent f92670cd0d
103 changed files with 15170 additions and 653 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@ ref
 *.blend1
 *.pyc
 *.vscode
 **/tensorboard_logs/**
 **/logs/**
--- a/env_manager/rbs_gym/CMakeLists.txt
+++ b/env_manager/rbs_gym/CMakeLists.txt
@ -0,0 +1,39 @@
 cmake_minimum_required(VERSION 3.8)
 project(rbs_gym)
 if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
  add_compile_options(-Wall -Wextra -Wpedantic)
 endif()
 # find dependencies
 find_package(ament_cmake REQUIRED)
 # uncomment the following section in order to fill in
 # further dependencies manually.
 # find_package(<dependency> REQUIRED)
 ament_python_install_package(${PROJECT_NAME})
 install(PROGRAMS
  scripts/train.py
  scripts/spawner.py
  scripts/velocity.py
  scripts/test_agent.py
  scripts/evaluate.py
  DESTINATION lib/${PROJECT_NAME}
 )
 if(BUILD_TESTING)
  find_package(ament_lint_auto REQUIRED)
  # the following line skips the linter which checks for copyrights
  # comment the line when a copyright and license is added to all source files
  set(ament_cmake_copyright_FOUND TRUE)
  # the following line skips cpplint (only works in a git repo)
  # comment the line when this package is in a git repo and when
  # a copyright and license is added to all source files
  set(ament_cmake_cpplint_FOUND TRUE)
  ament_lint_auto_find_test_dependencies()
 endif()
 install(DIRECTORY rbs_gym/envs/worlds launch DESTINATION share/${PROJECT_NAME})
 ament_package()
--- a/env_manager/rbs_gym/LICENSE
+++ b/env_manager/rbs_gym/LICENSE
@ -0,0 +1,202 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright [yyyy] [name of copyright owner]
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/env_manager/rbs_gym/hyperparams/sac.yml
+++ b/env_manager/rbs_gym/hyperparams/sac.yml
@ -0,0 +1,42 @@
 # Reach
 Reach-Gazebo-v0:
  policy: "MlpPolicy"
  policy_kwargs:
    n_critics: 2
    net_arch: [128, 64]
  n_timesteps: 200000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 512
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.001
  ent_coef: "auto_0.1"
  target_entropy: "auto"
  train_freq: [1, "episode"]
  gradient_steps: 100
  noise_type: "normal"
  noise_std: 0.025
  use_sde: False
  optimize_memory_usage: False
 Reach-ColorImage-Gazebo-v0:
  policy: "CnnPolicy"
  policy_kwargs:
    n_critics: 2
    net_arch: [128, 128]
  n_timesteps: 50000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 32
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.0005
  ent_coef: "auto_0.1"
  target_entropy: "auto"
  train_freq: [1, "episode"]
  gradient_steps: 100
  noise_type: "normal"
  noise_std: 0.025
  use_sde: False
  optimize_memory_usage: False
--- a/env_manager/rbs_gym/hyperparams/td3.yml
+++ b/env_manager/rbs_gym/hyperparams/td3.yml
@ -0,0 +1,39 @@
 Reach-Gazebo-v0:
  policy: "MlpPolicy"
  policy_kwargs:
    n_critics: 2
    net_arch: [128, 64]
  n_timesteps: 200000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 512
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.001
  train_freq: [1, "episode"]
  gradient_steps: 100
  target_policy_noise: 0.1
  target_noise_clip: 0.2
  noise_type: "normal"
  noise_std: 0.025
  optimize_memory_usage: False
 Reach-ColorImage-Gazebo-v0:
  policy: "CnnPolicy"
  policy_kwargs:
    n_critics: 2
    net_arch: [128, 128]
  n_timesteps: 50000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 32
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.0005
  train_freq: [1, "episode"]
  gradient_steps: 100
  target_policy_noise: 0.1
  target_noise_clip: 0.2
  noise_type: "normal"
  noise_std: 0.025
  optimize_memory_usage: True
--- a/env_manager/rbs_gym/hyperparams/tqc.yml
+++ b/env_manager/rbs_gym/hyperparams/tqc.yml
@ -0,0 +1,46 @@
 # Reach
 Reach-Gazebo-v0:
  policy: "MlpPolicy"
  policy_kwargs:
    n_quantiles: 25
    n_critics: 2
    net_arch: [128, 64]
  n_timesteps: 200000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 512
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.001
  ent_coef: "auto_0.1"
  target_entropy: "auto"
  top_quantiles_to_drop_per_net: 2
  train_freq: [1, "episode"]
  gradient_steps: 100
  noise_type: "normal"
  noise_std: 0.025
  use_sde: False
  optimize_memory_usage: False
 Reach-ColorImage-Gazebo-v0:
  policy: "CnnPolicy"
  policy_kwargs:
    n_quantiles: 25
    n_critics: 2
    net_arch: [128, 128]
  n_timesteps: 50000
  buffer_size: 25000
  learning_starts: 5000
  batch_size: 32
  learning_rate: lin_0.0002
  gamma: 0.95
  tau: 0.0005
  ent_coef: "auto_0.1"
  target_entropy: "auto"
  top_quantiles_to_drop_per_net: 2
  train_freq: [1, "episode"]
  gradient_steps: 100
  noise_type: "normal"
  noise_std: 0.025
  use_sde: False
  optimize_memory_usage: True
--- a/env_manager/rbs_gym/launch/evaluate.launch.py
+++ b/env_manager/rbs_gym/launch/evaluate.launch.py
@ -0,0 +1,426 @@
 from launch import LaunchContext, LaunchDescription
 from launch.actions import (
    DeclareLaunchArgument,
    IncludeLaunchDescription,
    OpaqueFunction,
    SetEnvironmentVariable,
    TimerAction
 )
 from launch.launch_description_sources import PythonLaunchDescriptionSource
 from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
 from launch_ros.substitutions import FindPackageShare
 from launch_ros.actions import Node
 import os
 from os import cpu_count
 from ament_index_python.packages import get_package_share_directory
 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
    description_package = LaunchConfiguration("description_package")
    description_file = LaunchConfiguration("description_file")
    robot_name = LaunchConfiguration("robot_name")
    start_joint_controller = LaunchConfiguration("start_joint_controller")
    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
    launch_simulation = LaunchConfiguration("launch_sim")
    launch_moveit = LaunchConfiguration("launch_moveit")
    launch_task_planner = LaunchConfiguration("launch_task_planner")
    launch_perception = LaunchConfiguration("launch_perception")
    moveit_config_package = LaunchConfiguration("moveit_config_package")
    moveit_config_file = LaunchConfiguration("moveit_config_file")
    use_sim_time = LaunchConfiguration("use_sim_time")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    hardware = LaunchConfiguration("hardware")
    env_manager = LaunchConfiguration("env_manager")
    launch_controllers = LaunchConfiguration("launch_controllers")
    gazebo_gui = LaunchConfiguration("gazebo_gui")
    gripper_name = LaunchConfiguration("gripper_name")
    # training arguments
    env = LaunchConfiguration("env")
    algo = LaunchConfiguration("algo")
    num_threads = LaunchConfiguration("num_threads")
    seed = LaunchConfiguration("seed")
    log_folder = LaunchConfiguration("log_folder")
    verbose = LaunchConfiguration("verbose")
    # use_sim_time = LaunchConfiguration("use_sim_time")
    log_level = LaunchConfiguration("log_level")
    env_kwargs = LaunchConfiguration("env_kwargs")
    n_episodes = LaunchConfiguration("n_episodes")
    exp_id = LaunchConfiguration("exp_id")
    load_best = LaunchConfiguration("load_best")
    load_checkpoint = LaunchConfiguration("load_checkpoint")
    stochastic = LaunchConfiguration("stochastic")
    reward_log = LaunchConfiguration("reward_log")
    norm_reward = LaunchConfiguration("norm_reward")
    no_render = LaunchConfiguration("no_render")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    launch_simulation = LaunchConfiguration("launch_sim")
    initial_joint_controllers_file_path = os.path.join(
        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
    )
    single_robot_setup = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
                FindPackageShare('rbs_bringup'),
                "launch",
                "rbs_robot.launch.py"
            ])
        ]),
        launch_arguments={
            "env_manager": env_manager,
            "with_gripper": with_gripper_condition,
            "gripper_name": gripper_name,
            # "controllers_file": controllers_file,
            "robot_type": robot_type,
            "controllers_file": initial_joint_controllers_file_path,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
            "robot_name": robot_name,
            "start_joint_controller": start_joint_controller,
            "initial_joint_controller": initial_joint_controller,
            "launch_simulation": launch_simulation,
            "launch_moveit": launch_moveit,
            "launch_task_planner": launch_task_planner,
            "launch_perception": launch_perception,
            "moveit_config_package": moveit_config_package,
            "moveit_config_file": moveit_config_file,
            "use_sim_time": use_sim_time,
            "sim_gazebo": sim_gazebo,
            "hardware": hardware,
            "launch_controllers": launch_controllers,
            # "gazebo_gui": gazebo_gui
        }.items()
    )
    args = [
            "--env",
            env,
            "--env-kwargs",
            env_kwargs,
            "--algo",
            algo,
            "--seed",
            seed,
            "--num-threads",
            num_threads,
            "--n-episodes",
            n_episodes,
            "--log-folder",
            log_folder,
            "--exp-id",
            exp_id,
            "--load-best",
            load_best,
            "--load-checkpoint",
            load_checkpoint,
            "--stochastic",
            stochastic,
            "--reward-log",
            reward_log,
            "--norm-reward",
            norm_reward,
            "--no-render",
            no_render,
            "--verbose",
            verbose,
            "--ros-args",
            "--log-level",
            log_level,
            ]
    rl_task = Node(
        package="rbs_gym",
        executable="evaluate.py",
        output="log",
        arguments=args,
        parameters=[{"use_sim_time": use_sim_time}],
    )
    delay_robot_control_stack = TimerAction(
        period=10.0,
        actions=[single_robot_setup]
    )
    nodes_to_start = [
        # env,
        rl_task,
        delay_robot_control_stack
    ]
    return nodes_to_start
 def generate_launch_description():
    declared_arguments = []
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_type",
            description="Type of robot by name",
            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
            default_value="rbs_arm",
        )
    )
    # General arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
            default_value="rbs_arm_controllers_gazebosim.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_package",
            default_value="rbs_arm",
            description="Description package with robot URDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom description.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_file",
            default_value="rbs_arm_modular.xacro",
            description="URDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_name",
            default_value="arm0",
            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "start_joint_controller",
            default_value="false",
            description="Enable headless mode for robot control",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_joint_controller",
            default_value="joint_trajectory_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_package",
            default_value="rbs_arm",
            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom moveit config.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_file",
            default_value="rbs_arm.srdf.xacro",
            description="MoveIt SRDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "use_sim_time",
            default_value="true",
            description="Make MoveIt to use simulation time.\
            This is needed for the trajectory planing in simulation.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "gripper_name",
            default_value="rbs_gripper",
            choices=["rbs_gripper", ""],
            description="choose gripper by name (leave empty if hasn't)",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
                              default_value="true", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_gazebo", 
                              default_value="true", 
                              description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("env_manager", 
                              default_value="false", 
                              description="Launch env_manager?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_sim", 
                              default_value="true", 
                              description="Launch simulator (Gazebo)?\
                              Most general arg")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_moveit", 
                              default_value="false", 
                              description="Launch moveit?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_perception", 
                              default_value="false", 
                              description="Launch perception?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_task_planner", 
                              default_value="false", 
                              description="Launch task_planner?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("cartesian_controllers", 
                              default_value="true", 
                              description="Load cartesian\
                              controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("hardware", 
                              choices=["gazebo", "mock"],
                              default_value="gazebo", 
                              description="Choose your harware_interface")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_controllers", 
                              default_value="true", 
                              description="Launch controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui", 
                              default_value="true", 
                              description="Launch gazebo with gui?")
    )
    # training arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "env",
            default_value="Reach-Gazebo-v0",
            description="Environment ID",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "env_kwargs",
            default_value="",
            description="Optional keyword argument to pass to the env constructor.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "vec_env",
            default_value="dummy",
            description="Type of VecEnv to use (dummy or subproc).",
        ))
        # Algorithm and training
    declared_arguments.append(
        DeclareLaunchArgument(
            "algo",
            default_value="sac",
            description="RL algorithm to use during the training.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "num_threads",
            default_value="-1",
            description="Number of threads for PyTorch (-1 to use default).",
        ))
        # Random seed
    declared_arguments.append(
        DeclareLaunchArgument(
            "seed",
            default_value="84",
            description="Random generator seed.",
        ))
        # Logging
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_folder",
            default_value="logs",
            description="Path to the log directory.",
        ))
        # Verbosity
    declared_arguments.append(
        DeclareLaunchArgument(
            "verbose",
            default_value="1",
            description="Verbose mode (0: no output, 1: INFO).",
        ))
        # HER specifics
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_level",
            default_value="error",
            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_episodes",
            default_value="200",
            description="Number of evaluation episodes.",
        ))
    declared_arguments.append(
    DeclareLaunchArgument(
            "exp_id",
            default_value="0",
            description="Experiment ID (default: 0: latest, -1: no exp folder).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "load_best",
            default_value="false",
            description="Load best model instead of last model if available.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "load_checkpoint",
            default_value="0",
            description="Load checkpoint instead of last model if available, you must pass the number of timesteps corresponding to it.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "stochastic",
            default_value="false",
            description="Use stochastic actions instead of deterministic.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "reward_log",
            default_value="reward_logs",
            description="Where to log reward.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "norm_reward",
            default_value="false",
            description="Normalize reward if applicable (trained with VecNormalize)",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "no_render",
            default_value="true",
            description="Do not render the environment (useful for tests).",
        ))
    env_variables = [
        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
    ]
    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/launch/optimize.launch.py
+++ b/env_manager/rbs_gym/launch/optimize.launch.py
@ -0,0 +1,519 @@
 from launch import LaunchDescription
 from launch.actions import (
    DeclareLaunchArgument,
    IncludeLaunchDescription,
    OpaqueFunction,
    TimerAction
 )
 from launch.launch_description_sources import PythonLaunchDescriptionSource
 from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
 from launch_ros.substitutions import FindPackageShare
 from launch_ros.actions import Node
 import os
 from os import cpu_count
 from ament_index_python.packages import get_package_share_directory
 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
    description_package = LaunchConfiguration("description_package")
    description_file = LaunchConfiguration("description_file")
    robot_name = LaunchConfiguration("robot_name")
    start_joint_controller = LaunchConfiguration("start_joint_controller")
    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
    launch_simulation = LaunchConfiguration("launch_sim")
    launch_moveit = LaunchConfiguration("launch_moveit")
    launch_task_planner = LaunchConfiguration("launch_task_planner")
    launch_perception = LaunchConfiguration("launch_perception")
    moveit_config_package = LaunchConfiguration("moveit_config_package")
    moveit_config_file = LaunchConfiguration("moveit_config_file")
    use_sim_time = LaunchConfiguration("use_sim_time")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    hardware = LaunchConfiguration("hardware")
    env_manager = LaunchConfiguration("env_manager")
    launch_controllers = LaunchConfiguration("launch_controllers")
    gripper_name = LaunchConfiguration("gripper_name")
    # training arguments
    env = LaunchConfiguration("env")
    env_kwargs = LaunchConfiguration("env_kwargs")
    algo = LaunchConfiguration("algo")
    hyperparams = LaunchConfiguration("hyperparams")
    n_timesteps = LaunchConfiguration("n_timesteps")
    num_threads = LaunchConfiguration("num_threads")
    seed = LaunchConfiguration("seed")
    preload_replay_buffer = LaunchConfiguration("preload_replay_buffer")
    log_folder = LaunchConfiguration("log_folder")
    tensorboard_log = LaunchConfiguration("tensorboard_log")
    log_interval = LaunchConfiguration("log_interval")
    uuid = LaunchConfiguration("uuid")
    eval_episodes = LaunchConfiguration("eval_episodes")
    verbose = LaunchConfiguration("verbose")
    truncate_last_trajectory = LaunchConfiguration("truncate_last_trajectory")
    use_sim_time = LaunchConfiguration("use_sim_time")
    log_level = LaunchConfiguration("log_level")
    sampler = LaunchConfiguration("sampler")
    pruner = LaunchConfiguration("pruner")
    n_trials = LaunchConfiguration("n_trials")
    n_startup_trials = LaunchConfiguration("n_startup_trials")
    n_evaluations = LaunchConfiguration("n_evaluations")
    n_jobs = LaunchConfiguration("n_jobs")
    storage = LaunchConfiguration("storage")
    study_name = LaunchConfiguration("study_name")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    launch_simulation = LaunchConfiguration("launch_sim")
    initial_joint_controllers_file_path = os.path.join(
        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
    )
    single_robot_setup = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
                FindPackageShare('rbs_bringup'),
                "launch",
                "rbs_robot.launch.py"
            ])
        ]),
        launch_arguments={
            "env_manager": env_manager,
            "with_gripper": with_gripper_condition,
            "gripper_name": gripper_name,
            "controllers_file": controllers_file,
            "robot_type": robot_type,
            "controllers_file": initial_joint_controllers_file_path,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
            "robot_name": robot_name,
            "start_joint_controller": start_joint_controller,
            "initial_joint_controller": initial_joint_controller,
            "launch_simulation": launch_simulation,
            "launch_moveit": launch_moveit,
            "launch_task_planner": launch_task_planner,
            "launch_perception": launch_perception,
            "moveit_config_package": moveit_config_package,
            "moveit_config_file": moveit_config_file,
            "use_sim_time": use_sim_time,
            "sim_gazebo": sim_gazebo,
            "hardware": hardware,
            "launch_controllers": launch_controllers,
            # "gazebo_gui": gazebo_gui
        }.items()
    )
    args = [
        "--env",
        env,
        "--env-kwargs",
        env_kwargs,
        "--algo",
        algo,
        "--seed",
        seed,
        "--num-threads",
        num_threads,
        "--n-timesteps",
        n_timesteps,
        "--preload-replay-buffer",
        preload_replay_buffer,
        "--log-folder",
        log_folder,
        "--tensorboard-log",
        tensorboard_log,
        "--log-interval",
        log_interval,
        "--uuid",
        uuid,
        "--optimize-hyperparameters",
        "True",
        "--sampler",
        sampler,
        "--pruner",
        pruner,
        "--n-trials",
        n_trials,
        "--n-startup-trials",
        n_startup_trials,
        "--n-evaluations",
        n_evaluations,
        "--n-jobs",
        n_jobs,
        "--storage",
        storage,
        "--study-name",
        study_name,
        "--eval-episodes",
        eval_episodes,
        "--verbose",
        verbose,
        "--truncate-last-trajectory",
        truncate_last_trajectory,
       "--ros-args",
       "--log-level",
       log_level,
    ]
    rl_task = Node(
        package="rbs_gym",
        executable="train.py",
        output="log",
        arguments = args,
        parameters=[{"use_sim_time": True}]
    )
    delay_robot_control_stack = TimerAction(
        period=10.0,
        actions=[single_robot_setup]
    )
    nodes_to_start = [
        rl_task,
        delay_robot_control_stack
    ]
    return nodes_to_start
 def generate_launch_description():
    declared_arguments = []
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_type",
            description="Type of robot by name",
            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
            default_value="rbs_arm",
        )
    )
    # General arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
            default_value="rbs_arm_controllers_gazebosim.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_package",
            default_value="rbs_arm",
            description="Description package with robot URDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom description.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_file",
            default_value="rbs_arm_modular.xacro",
            description="URDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_name",
            default_value="arm0",
            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "start_joint_controller",
            default_value="false",
            description="Enable headless mode for robot control",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_joint_controller",
            default_value="joint_trajectory_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_package",
            default_value="rbs_arm",
            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom moveit config.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_file",
            default_value="rbs_arm.srdf.xacro",
            description="MoveIt SRDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "use_sim_time",
            default_value="true",
            description="Make MoveIt to use simulation time.\
            This is needed for the trajectory planing in simulation.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "gripper_name",
            default_value="rbs_gripper",
            choices=["rbs_gripper", ""],
            description="choose gripper by name (leave empty if hasn't)",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
                              default_value="true", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_gazebo", 
                              default_value="true", 
                              description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("env_manager", 
                              default_value="false", 
                              description="Launch env_manager?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_sim", 
                              default_value="true", 
                              description="Launch simulator (Gazebo)?\
                              Most general arg")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_moveit", 
                              default_value="false", 
                              description="Launch moveit?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_perception", 
                              default_value="false", 
                              description="Launch perception?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_task_planner", 
                              default_value="false", 
                              description="Launch task_planner?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("cartesian_controllers", 
                              default_value="true", 
                              description="Load cartesian\
                              controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("hardware", 
                              choices=["gazebo", "mock"],
                              default_value="gazebo", 
                              description="Choose your harware_interface")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_controllers", 
                              default_value="true", 
                              description="Launch controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui", 
                              default_value="true", 
                              description="Launch gazebo with gui?")
    )
    # training arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "env",
            default_value="Reach-Gazebo-v0",
            description="Environment ID",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "env_kwargs",
            default_value="",
            description="Optional keyword argument to pass to the env constructor.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "vec_env",
            default_value="dummy",
            description="Type of VecEnv to use (dummy or subproc).",
        ))
        # Algorithm and training
    declared_arguments.append(
        DeclareLaunchArgument(
            "algo",
            default_value="sac",
            description="RL algorithm to use during the training.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_timesteps",
            default_value="-1",
            description="Overwrite the number of timesteps.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "hyperparams",
            default_value="",
            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "num_threads",
            default_value="-1",
            description="Number of threads for PyTorch (-1 to use default).",
        ))
        # Continue training an already trained agent
    declared_arguments.append(
        DeclareLaunchArgument(
            "trained_agent",
            default_value="",
            description="Path to a pretrained agent to continue training.",
        ))
        # Random seed
    declared_arguments.append(
        DeclareLaunchArgument(
            "seed",
            default_value="84",
            description="Random generator seed.",
        ))
        # Saving of model
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_freq",
            default_value="10000",
            description="Save the model every n steps (if negative, no checkpoint).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_replay_buffer",
            default_value="False",
            description="Save the replay buffer too (when applicable).",
        ))
        # Pre-load a replay buffer and start training on it
    declared_arguments.append(
        DeclareLaunchArgument(
            "preload_replay_buffer",
            default_value="",
            description="Path to a replay buffer that should be preloaded before starting the training process.",
        ))
        # Logging
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_folder",
            default_value="logs",
            description="Path to the log directory.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "tensorboard_log",
            default_value="tensorboard_logs",
            description="Tensorboard log dir.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_interval",
            default_value="-1",
            description="Override log interval (default: -1, no change).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "uuid",
            default_value="False",
            description="Ensure that the run has a unique ID.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "sampler",
            default_value="tpe",
            description="Sampler to use when optimizing hyperparameters (random, tpe or skopt).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "pruner",
            default_value="median",
            description="Pruner to use when optimizing hyperparameters (halving, median or none).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_trials",
            default_value="10",
            description="Number of trials for optimizing hyperparameters.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_startup_trials",
            default_value="5",
            description="Number of trials before using optuna sampler.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_evaluations",
            default_value="2",
            description="Number of evaluations for hyperparameter optimization.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_jobs",
            default_value="1",
            description="Number of parallel jobs when optimizing hyperparameters.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "storage",
            default_value="",
            description="Database storage path if distributed optimization should be used.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "study_name",
            default_value="",
            description="Study name for distributed optimization.",
        ))
        # Evaluation
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_freq",
            default_value="-1",
            description="Evaluate the agent every n steps (if negative, no evaluation).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_episodes",
            default_value="5",
            description="Number of episodes to use for evaluation.",
        ))
        # Verbosity
    declared_arguments.append(
        DeclareLaunchArgument(
            "verbose",
            default_value="1",
            description="Verbose mode (0: no output, 1: INFO).",
        ))
        # HER specifics
    declared_arguments.append(
        DeclareLaunchArgument(
            "truncate_last_trajectory",
            default_value="True",
            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
        )),
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_level",
            default_value="error",
            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
        ))
 #   env_variables = [
 #       SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
 #       SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
 #   ]
    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)])
--- a/env_manager/rbs_gym/launch/test_env.launch.py
+++ b/env_manager/rbs_gym/launch/test_env.launch.py
@ -0,0 +1,415 @@
 from launch import LaunchDescription
 from launch.actions import (
    DeclareLaunchArgument,
    IncludeLaunchDescription,
    OpaqueFunction,
    SetEnvironmentVariable,
    TimerAction
 )
 from launch.launch_description_sources import PythonLaunchDescriptionSource
 from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
 from launch_ros.substitutions import FindPackageShare
 from launch_ros.actions import Node
 import os
 from os import cpu_count
 from ament_index_python.packages import get_package_share_directory
 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
    description_package = LaunchConfiguration("description_package")
    description_file = LaunchConfiguration("description_file")
    robot_name = LaunchConfiguration("robot_name")
    start_joint_controller = LaunchConfiguration("start_joint_controller")
    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
    launch_simulation = LaunchConfiguration("launch_sim")
    launch_moveit = LaunchConfiguration("launch_moveit")
    launch_task_planner = LaunchConfiguration("launch_task_planner")
    launch_perception = LaunchConfiguration("launch_perception")
    moveit_config_package = LaunchConfiguration("moveit_config_package")
    moveit_config_file = LaunchConfiguration("moveit_config_file")
    use_sim_time = LaunchConfiguration("use_sim_time")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    hardware = LaunchConfiguration("hardware")
    env_manager = LaunchConfiguration("env_manager")
    launch_controllers = LaunchConfiguration("launch_controllers")
    gripper_name = LaunchConfiguration("gripper_name")
    # training arguments
    env = LaunchConfiguration("env")
    use_sim_time = LaunchConfiguration("use_sim_time")
    log_level = LaunchConfiguration("log_level")
    env_kwargs = LaunchConfiguration("env_kwargs")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    launch_simulation = LaunchConfiguration("launch_sim")
    initial_joint_controllers_file_path = os.path.join(
        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
    )
    single_robot_setup = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
                FindPackageShare('rbs_bringup'),
                "launch",
                "rbs_robot.launch.py"
            ])
        ]),
        launch_arguments={
            "env_manager": env_manager,
            "with_gripper": with_gripper_condition,
            "gripper_name": gripper_name,
            "controllers_file": controllers_file,
            "robot_type": robot_type,
            "controllers_file": initial_joint_controllers_file_path,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
            "robot_name": robot_name,
            "start_joint_controller": start_joint_controller,
            "initial_joint_controller": initial_joint_controller,
            "launch_simulation": launch_simulation,
            "launch_moveit": launch_moveit,
            "launch_task_planner": launch_task_planner,
            "launch_perception": launch_perception,
            "moveit_config_package": moveit_config_package,
            "moveit_config_file": moveit_config_file,
            "use_sim_time": use_sim_time,
            "sim_gazebo": sim_gazebo,
            "hardware": hardware,
            "launch_controllers": launch_controllers,
            # "gazebo_gui": gazebo_gui
        }.items()
    )
    args = [
        "--env",
        env,
        "--env-kwargs",
        env_kwargs,
        "--ros-args",
        "--log-level",
        log_level,
    ]
    rl_task = Node(
        package="rbs_gym",
        executable="test_agent.py",
        output="log",
        arguments=args,
        parameters=[{"use_sim_time": True}]
    )
    clock_bridge = Node(
        package='ros_gz_bridge',
        executable='parameter_bridge',
        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
        output='screen')
    delay_robot_control_stack = TimerAction(
        period=10.0,
        actions=[single_robot_setup]
    )
    nodes_to_start = [
        # env,
        rl_task,
        clock_bridge,
        delay_robot_control_stack
    ]
    return nodes_to_start
 def generate_launch_description():
    declared_arguments = []
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_type",
            description="Type of robot by name",
            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
            default_value="rbs_arm",
        )
    )
    # General arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
            default_value="rbs_arm_controllers_gazebosim.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_package",
            default_value="rbs_arm",
            description="Description package with robot URDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom description.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_file",
            default_value="rbs_arm_modular.xacro",
            description="URDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_name",
            default_value="arm0",
            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "start_joint_controller",
            default_value="false",
            description="Enable headless mode for robot control",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_joint_controller",
            default_value="joint_trajectory_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_package",
            default_value="rbs_arm",
            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom moveit config.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_file",
            default_value="rbs_arm.srdf.xacro",
            description="MoveIt SRDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "use_sim_time",
            default_value="true",
            description="Make MoveIt to use simulation time.\
            This is needed for the trajectory planing in simulation.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "gripper_name",
            default_value="rbs_gripper",
            choices=["rbs_gripper", ""],
            description="choose gripper by name (leave empty if hasn't)",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
                              default_value="true", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_gazebo", 
                              default_value="true", 
                              description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("env_manager", 
                              default_value="false", 
                              description="Launch env_manager?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_sim", 
                              default_value="true", 
                              description="Launch simulator (Gazebo)?\
                              Most general arg")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_moveit", 
                              default_value="false", 
                              description="Launch moveit?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_perception", 
                              default_value="false", 
                              description="Launch perception?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_task_planner", 
                              default_value="false", 
                              description="Launch task_planner?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("cartesian_controllers", 
                              default_value="true", 
                              description="Load cartesian\
                              controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("hardware", 
                              choices=["gazebo", "mock"],
                              default_value="gazebo", 
                              description="Choose your harware_interface")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_controllers", 
                              default_value="true", 
                              description="Launch controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui", 
                              default_value="true", 
                              description="Launch gazebo with gui?")
    )
    # training arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "env",
            default_value="Reach-Gazebo-v0",
            description="Environment ID",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "env_kwargs",
            default_value="",
            description="Optional keyword argument to pass to the env constructor.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "vec_env",
            default_value="dummy",
            description="Type of VecEnv to use (dummy or subproc).",
        ))
        # Algorithm and training
    declared_arguments.append(
        DeclareLaunchArgument(
            "algo",
            default_value="sac",
            description="RL algorithm to use during the training.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_timesteps",
            default_value="-1",
            description="Overwrite the number of timesteps.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "hyperparams",
            default_value="",
            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "num_threads",
            default_value="-1",
            description="Number of threads for PyTorch (-1 to use default).",
        ))
        # Continue training an already trained agent
    declared_arguments.append(
        DeclareLaunchArgument(
            "trained_agent",
            default_value="",
            description="Path to a pretrained agent to continue training.",
        ))
        # Random seed
    declared_arguments.append(
        DeclareLaunchArgument(
            "seed",
            default_value="-1",
            description="Random generator seed.",
        ))
        # Saving of model
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_freq",
            default_value="10000",
            description="Save the model every n steps (if negative, no checkpoint).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_replay_buffer",
            default_value="False",
            description="Save the replay buffer too (when applicable).",
        ))
        # Pre-load a replay buffer and start training on it
    declared_arguments.append(
        DeclareLaunchArgument(
            "preload_replay_buffer",
            default_value="",
            description="Path to a replay buffer that should be preloaded before starting the training process.",
        ))
        # Logging
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_folder",
            default_value="logs",
            description="Path to the log directory.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "tensorboard_log",
            default_value="tensorboard_logs",
            description="Tensorboard log dir.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_interval",
            default_value="-1",
            description="Override log interval (default: -1, no change).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "uuid",
            default_value="False",
            description="Ensure that the run has a unique ID.",
        ))
        # Evaluation
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_freq",
            default_value="-1",
            description="Evaluate the agent every n steps (if negative, no evaluation).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_episodes",
            default_value="5",
            description="Number of episodes to use for evaluation.",
        ))
        # Verbosity
    declared_arguments.append(
        DeclareLaunchArgument(
            "verbose",
            default_value="1",
            description="Verbose mode (0: no output, 1: INFO).",
        ))
        # HER specifics
    declared_arguments.append(
        DeclareLaunchArgument(
            "truncate_last_trajectory",
            default_value="True",
            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
        )),
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_level",
            default_value="error",
            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
        ))
    env_variables = [
        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
    ]
    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/launch/train.launch.py
+++ b/env_manager/rbs_gym/launch/train.launch.py
@ -0,0 +1,476 @@
 from launch import LaunchDescription
 from launch.actions import (
    DeclareLaunchArgument,
    IncludeLaunchDescription,
    OpaqueFunction,
    SetEnvironmentVariable,
    TimerAction
 )
 from launch.launch_description_sources import PythonLaunchDescriptionSource
 from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
 from launch_ros.substitutions import FindPackageShare
 from launch_ros.actions import Node
 import os
 from os import cpu_count
 from ament_index_python.packages import get_package_share_directory
 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
    description_package = LaunchConfiguration("description_package")
    description_file = LaunchConfiguration("description_file")
    robot_name = LaunchConfiguration("robot_name")
    start_joint_controller = LaunchConfiguration("start_joint_controller")
    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
    launch_simulation = LaunchConfiguration("launch_sim")
    launch_moveit = LaunchConfiguration("launch_moveit")
    launch_task_planner = LaunchConfiguration("launch_task_planner")
    launch_perception = LaunchConfiguration("launch_perception")
    moveit_config_package = LaunchConfiguration("moveit_config_package")
    moveit_config_file = LaunchConfiguration("moveit_config_file")
    use_sim_time = LaunchConfiguration("use_sim_time")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    hardware = LaunchConfiguration("hardware")
    env_manager = LaunchConfiguration("env_manager")
    launch_controllers = LaunchConfiguration("launch_controllers")
    gripper_name = LaunchConfiguration("gripper_name")
    # training arguments
    env = LaunchConfiguration("env")
    algo = LaunchConfiguration("algo")
    hyperparams = LaunchConfiguration("hyperparams")
    n_timesteps = LaunchConfiguration("n_timesteps")
    num_threads = LaunchConfiguration("num_threads")
    seed = LaunchConfiguration("seed")
    trained_agent = LaunchConfiguration("trained_agent")
    save_freq = LaunchConfiguration("save_freq")
    save_replay_buffer = LaunchConfiguration("save_replay_buffer")
    preload_replay_buffer = LaunchConfiguration("preload_replay_buffer")
    log_folder = LaunchConfiguration("log_folder")
    tensorboard_log = LaunchConfiguration("tensorboard_log")
    log_interval = LaunchConfiguration("log_interval")
    uuid = LaunchConfiguration("uuid")
    eval_freq = LaunchConfiguration("eval_freq")
    eval_episodes = LaunchConfiguration("eval_episodes")
    verbose = LaunchConfiguration("verbose")
    truncate_last_trajectory = LaunchConfiguration("truncate_last_trajectory")
    use_sim_time = LaunchConfiguration("use_sim_time")
    log_level = LaunchConfiguration("log_level")
    env_kwargs = LaunchConfiguration("env_kwargs")
    track = LaunchConfiguration("track")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    launch_simulation = LaunchConfiguration("launch_sim")
    initial_joint_controllers_file_path = os.path.join(
        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
    )
    single_robot_setup = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
                FindPackageShare('rbs_bringup'),
                "launch",
                "rbs_robot.launch.py"
            ])
        ]),
        launch_arguments={
            "env_manager": env_manager,
            "with_gripper": with_gripper_condition,
            "gripper_name": gripper_name,
            "controllers_file": controllers_file,
            "robot_type": robot_type,
            "controllers_file": initial_joint_controllers_file_path,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
            "robot_name": robot_name,
            "start_joint_controller": start_joint_controller,
            "initial_joint_controller": initial_joint_controller,
            "launch_simulation": launch_simulation,
            "launch_moveit": launch_moveit,
            "launch_task_planner": launch_task_planner,
            "launch_perception": launch_perception,
            "moveit_config_package": moveit_config_package,
            "moveit_config_file": moveit_config_file,
            "use_sim_time": use_sim_time,
            "sim_gazebo": sim_gazebo,
            "hardware": hardware,
            "launch_controllers": launch_controllers,
            # "gazebo_gui": gazebo_gui
        }.items()
    )
    args = [
        "--env",
        env,
        "--env-kwargs",
        env_kwargs,
        "--algo",
        algo,
        "--hyperparams",
        hyperparams,
        "--n-timesteps",
        n_timesteps,
        "--num-threads",
        num_threads,
        "--seed",
        seed,
        "--trained-agent",
        trained_agent,
        "--save-freq",
        save_freq,
        "--save-replay-buffer",
        save_replay_buffer,
        "--preload-replay-buffer",
        preload_replay_buffer,
        "--log-folder",
        log_folder,
        "--tensorboard-log",
        tensorboard_log,
        "--log-interval",
        log_interval,
        "--uuid",
        uuid,
        "--eval-freq",
        eval_freq,
        "--eval-episodes",
        eval_episodes,
        "--verbose",
        verbose,
        "--track",
        track,
        "--truncate-last-trajectory",
        truncate_last_trajectory,
        "--ros-args",
        "--log-level",
        log_level,
    ]
    clock_bridge = Node(
        package='ros_gz_bridge',
        executable='parameter_bridge',
        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
        output='screen')
    rl_task = Node(
        package="rbs_gym",
        executable="train.py",
        output="log",
        arguments=args,
        parameters=[{"use_sim_time": True}]
    )
    delay_robot_control_stack = TimerAction(
        period=20.0,
        actions=[single_robot_setup]
    )
    nodes_to_start = [
        # env,
        rl_task,
        clock_bridge,
        delay_robot_control_stack
    ]
    return nodes_to_start
 def generate_launch_description():
    declared_arguments = []
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_type",
            description="Type of robot by name",
            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
            default_value="rbs_arm",
        )
    )
    # General arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
            default_value="rbs_arm_controllers_gazebosim.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_package",
            default_value="rbs_arm",
            description="Description package with robot URDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom description.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_file",
            default_value="rbs_arm_modular.xacro",
            description="URDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "robot_name",
            default_value="arm0",
            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "start_joint_controller",
            default_value="false",
            description="Enable headless mode for robot control",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_joint_controller",
            default_value="joint_trajectory_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_package",
            default_value="rbs_arm",
            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom moveit config.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_file",
            default_value="rbs_arm.srdf.xacro",
            description="MoveIt SRDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "use_sim_time",
            default_value="true",
            description="Make MoveIt to use simulation time.\
            This is needed for the trajectory planing in simulation.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "gripper_name",
            default_value="rbs_gripper",
            choices=["rbs_gripper", ""],
            description="choose gripper by name (leave empty if hasn't)",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
                              default_value="true", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_gazebo", 
                              default_value="true", 
                              description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("env_manager", 
                              default_value="false", 
                              description="Launch env_manager?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_sim", 
                              default_value="true", 
                              description="Launch simulator (Gazebo)?\
                              Most general arg")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_moveit", 
                              default_value="false", 
                              description="Launch moveit?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_perception", 
                              default_value="false", 
                              description="Launch perception?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_task_planner", 
                              default_value="false", 
                              description="Launch task_planner?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("cartesian_controllers", 
                              default_value="true", 
                              description="Load cartesian\
                              controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("hardware", 
                              choices=["gazebo", "mock"],
                              default_value="gazebo", 
                              description="Choose your harware_interface")
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_controllers", 
                              default_value="true", 
                              description="Launch controllers?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui", 
                              default_value="true", 
                              description="Launch gazebo with gui?")
    )
    # training arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "env",
            default_value="Reach-Gazebo-v0",
            description="Environment ID",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "env_kwargs",
            default_value="",
            description="Optional keyword argument to pass to the env constructor.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "vec_env",
            default_value="dummy",
            description="Type of VecEnv to use (dummy or subproc).",
        ))
        # Algorithm and training
    declared_arguments.append(
        DeclareLaunchArgument(
            "algo",
            default_value="sac",
            description="RL algorithm to use during the training.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "n_timesteps",
            default_value="-1",
            description="Overwrite the number of timesteps.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "hyperparams",
            default_value="",
            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "num_threads",
            default_value="-1",
            description="Number of threads for PyTorch (-1 to use default).",
        ))
        # Continue training an already trained agent
    declared_arguments.append(
        DeclareLaunchArgument(
            "trained_agent",
            default_value="",
            description="Path to a pretrained agent to continue training.",
        ))
        # Random seed
    declared_arguments.append(
        DeclareLaunchArgument(
            "seed",
            default_value="-1",
            description="Random generator seed.",
        ))
        # Saving of model
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_freq",
            default_value="10000",
            description="Save the model every n steps (if negative, no checkpoint).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "save_replay_buffer",
            default_value="False",
            description="Save the replay buffer too (when applicable).",
        ))
        # Pre-load a replay buffer and start training on it
    declared_arguments.append(
        DeclareLaunchArgument(
            "preload_replay_buffer",
            default_value="",
            description="Path to a replay buffer that should be preloaded before starting the training process.",
        ))
        # Logging
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_folder",
            default_value="logs",
            description="Path to the log directory.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "tensorboard_log",
            default_value="tensorboard_logs",
            description="Tensorboard log dir.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_interval",
            default_value="-1",
            description="Override log interval (default: -1, no change).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "uuid",
            default_value="False",
            description="Ensure that the run has a unique ID.",
        ))
        # Evaluation
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_freq",
            default_value="-1",
            description="Evaluate the agent every n steps (if negative, no evaluation).",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "eval_episodes",
            default_value="5",
            description="Number of episodes to use for evaluation.",
        ))
        # Verbosity
    declared_arguments.append(
        DeclareLaunchArgument(
            "verbose",
            default_value="1",
            description="Verbose mode (0: no output, 1: INFO).",
        ))
        # HER specifics
    declared_arguments.append(
        DeclareLaunchArgument(
            "truncate_last_trajectory",
            default_value="True",
            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
        )),
    declared_arguments.append(
        DeclareLaunchArgument(
            "log_level",
            default_value="error",
            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
        ))
    declared_arguments.append(
        DeclareLaunchArgument(
            "track",
            default_value="true",
            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
        ))
    env_variables = [
        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
    ]
    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/package.xml
+++ b/env_manager/rbs_gym/package.xml
@ -0,0 +1,18 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
  <name>rbs_gym</name>
  <version>0.0.0</version>
  <description>TODO: Package description</description>
  <maintainer email="ur.narmak@gmail.com">bill-finger</maintainer>
  <license>Apache-2.0</license>
  <buildtool_depend>ament_cmake</buildtool_depend>
  <test_depend>ament_lint_auto</test_depend>
  <test_depend>ament_lint_common</test_depend>
  <export>
    <build_type>ament_cmake</build_type>
  </export>
 </package>
--- a/env_manager/rbs_gym/rbs_gym/init.py
+++ b/env_manager/rbs_gym/rbs_gym/init.py
--- a/env_manager/rbs_gym/rbs_gym/envs/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/init.py
@ -0,0 +1,217 @@
 # Note: The `open3d` and `stable_baselines3` modules must be imported prior to `gym_gz`
 import open3d  # isort:skip
 import stable_baselines3  # isort:skip
 # Note: If installed, `tensorflow` module must be imported before `gym_gz`/`scenario`
 # Otherwise, protobuf version incompatibility will cause an error
 try:
    from importlib.util import find_spec
    if find_spec("tensorflow") is not None:
        import tensorflow
 except:
    pass
 from os import environ, path
 from typing import Dict, Tuple
 import numpy as np
 from ament_index_python.packages import get_package_share_directory
 from gymnasium.envs.registration import register
 from rbs_gym.utils.utils import str2bool
 from . import tasks
 ######################
 # Runtime Entrypoint #
 ######################
 # Entrypoint for tasks (can be simulated or real)
 RBS_ENVS_TASK_ENTRYPOINT: str = (
    "gym_gz.runtimes.gazebo_runtime:GazeboRuntime"
 )
 ###################
 # Robot Specifics #
 ###################
 # Default robot model to use in the tasks where robot can be static
 RBS_ENVS_ROBOT_MODEL: str = "rbs_arm"
 ######################
 # Datasets and paths #
 ######################
 # Path to directory containing base SDF worlds
 RBS_ENVS_WORLDS_DIR: str = path.join(
    get_package_share_directory("rbs_gym"), "worlds"
 )
 ###########
 # Presets #
 ###########
 # Gravity preset for Earth
 GRAVITY_EARTH: Tuple[float, float, float] = (0.0, 0.0, -9.80665)
 GRAVITY_EARTH_STD: Tuple[float, float, float] =  (0.0, 0.0, 0.0232)
 ############################
 # Additional Configuration #
 ############################
 BROADCAST_GUI: bool = str2bool(
    environ.get("RBS_ENVS_BROADCAST_INTERACTIVE_GUI", default=True)
 )
 #########
 # Reach #
 #########
 REACH_MAX_EPISODE_STEPS: int = 100
 REACH_KWARGS: Dict[str, any] = {
    "agent_rate": 4.0,
    "robot_model": RBS_ENVS_ROBOT_MODEL,
    "workspace_frame_id": "world",
    "workspace_centre": (-0.45, 0.0, 0.35),
    "workspace_volume": (0.7, 0.7, 0.7),
    "ignore_new_actions_while_executing": False,
    "use_servo": True,
    "scaling_factor_translation": 8.0,
    "scaling_factor_rotation": 3.0,
    "restrict_position_goal_to_workspace": False,
    "enable_gripper": False,
    "sparse_reward": False,
    "collision_reward": -10.0,
    "act_quick_reward": -0.01,
    "required_accuracy": 0.05,
    "num_threads": 3,
 }
 REACH_KWARGS_SIM: Dict[str, any] = {
    "physics_rate": 1000.0,
    "real_time_factor": float(np.finfo(np.float32).max),
    "world": path.join(RBS_ENVS_WORLDS_DIR, "default.sdf"),
 }
 REACH_RANDOMIZER: str = "rbs_gym.envs.randomizers:ManipulationGazeboEnvRandomizer"
 REACH_KWARGS_RANDOMIZER: Dict[str, any] = {
    "gravity": GRAVITY_EARTH,
    "gravity_std": GRAVITY_EARTH_STD,
    "plugin_scene_broadcaster": BROADCAST_GUI,
    "plugin_user_commands": BROADCAST_GUI,
    "plugin_sensors_render_engine": "ogre2",
    "robot_random_pose": False,
    "robot_random_joint_positions": True, # FIXME: 
    "robot_random_joint_positions_std": 0.2,
    "robot_random_joint_positions_above_object_spawn": False,
    "robot_random_joint_positions_above_object_spawn_elevation": 0.0,
    "robot_random_joint_positions_above_object_spawn_xy_randomness": 0.2,
    "terrain_enable": True,
    "light_type": "sun",
    "light_direction": (0.5, 0.4, -0.2),
    "light_random_minmax_elevation": (-0.15, -0.5),
    "light_distance": 1000.0,
    "light_visual": False,
    "light_radius": 25.0,
    "light_model_rollouts_num": 1,
    "object_enable": True,
    "object_type": "sphere",
    "objects_relative_to": "base_link",
    "object_static": True,
    "object_collision": False,
    "object_visual": True,
    "object_color": (0.0, 0.0, 1.0, 1.0),
    "object_dimensions": [0.025, 0.025, 0.025],
    "object_count": 1,
    "object_spawn_position": (-0.4, 0, 0.3),
    "object_random_pose": True,
    "object_random_spawn_position_segments": [],
    "object_random_spawn_volume": (0.2, 0.2, 0.2),
    "object_models_rollouts_num": 0,
    "underworld_collision_plane": False,
 }
 REACH_KWARGS_RANDOMIZER_CAMERA: Dict[str, any] = {
    "camera_enable": True,
    "camera_width": 64,
    "camera_height": 64,
    "camera_update_rate": 1.2 * REACH_KWARGS["agent_rate"],
    "camera_horizontal_fov": np.pi / 3.0,
    "camera_vertical_fov": np.pi / 3.0,
    "camera_noise_mean": 0.0,
    "camera_noise_stddev": 0.001,
    "camera_relative_to": "base_link",
    "camera_spawn_position": (0.85, -0.4, 0.45),
    "camera_spawn_quat_xyzw": (-0.0402991, -0.0166924, 0.9230002, 0.3823192),
    "camera_random_pose_rollouts_num": 0,
    "camera_random_pose_mode": "orbit",
    "camera_random_pose_orbit_distance": 1.0,
    "camera_random_pose_orbit_height_range": (0.1, 0.7),
    "camera_random_pose_orbit_ignore_arc_behind_robot": np.pi / 8,
    "camera_random_pose_select_position_options": [],
    "camera_random_pose_focal_point_z_offset": 0.0,
 }
 # Task
 register(
    id="Reach-v0",
    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "task_cls": tasks.Reach,
        **REACH_KWARGS,
    },
 )
 register(
    id="Reach-ColorImage-v0",
    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "task_cls": tasks.ReachColorImage,
        **REACH_KWARGS,
    },
 )
 register(
    id="Reach-DepthImage-v0",
    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "task_cls": tasks.ReachDepthImage,
        **REACH_KWARGS,
    },
 )
 # Gazebo wrapper
 register(
    id="Reach-Gazebo-v0",
    entry_point=REACH_RANDOMIZER,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "env": "Reach-v0",
        **REACH_KWARGS_SIM,
        **REACH_KWARGS_RANDOMIZER,
        "camera_enable": False,
    },
 )
 register(
    id="Reach-ColorImage-Gazebo-v0",
    entry_point=REACH_RANDOMIZER,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "env": "Reach-ColorImage-v0",
        **REACH_KWARGS_SIM,
        **REACH_KWARGS_RANDOMIZER,
        **REACH_KWARGS_RANDOMIZER_CAMERA,
        "camera_type": "rgbd_camera",
        "camera_publish_color": True,
    },
 )
 register(
    id="Reach-DepthImage-Gazebo-v0",
    entry_point=REACH_RANDOMIZER,
    max_episode_steps=REACH_MAX_EPISODE_STEPS,
    kwargs={
        "env": "Reach-DepthImage-v0",
        **REACH_KWARGS_SIM,
        **REACH_KWARGS_RANDOMIZER,
        **REACH_KWARGS_RANDOMIZER_CAMERA,
        "camera_type": "depth_camera",
        "camera_publish_depth": True,
    },
 )
--- a/env_manager/rbs_gym/rbs_gym/envs/control/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/init.py
@ -0,0 +1,3 @@
 from .cartesian_force_controller import CartesianForceController
 from .grippper_controller import GripperController
 from .joint_effort_controller import JointEffortController
--- a/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_force_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_force_controller.py
@ -0,0 +1,41 @@
 from typing import Optional
 from geometry_msgs.msg import WrenchStamped
 from rclpy.node import Node
 from rclpy.parameter import Parameter
 class CartesianForceController:
    def __init__(self, node, namespace: Optional[str] = "") -> None:
        self.node = node
        self.publisher = node.create_publisher(WrenchStamped, 
                                               namespace + "/cartesian_force_controller/target_wrench", 10)
        self.timer = node.create_timer(0.1, self.timer_callback)
        self.publish = False
        self._target_wrench = WrenchStamped()
    @property
    def target_wrench(self) -> WrenchStamped:
        return self._target_wrench
    @target_wrench.setter
    def target_wrench(self, wrench: WrenchStamped):
        self._target_wrench = wrench
    def timer_callback(self):
        if self.publish:
            self.publisher.publish(self._target_wrench)
 class CartesianForceControllerStandalone(Node, CartesianForceController):
    def __init__(self, node_name:str = "rbs_gym_controller", use_sim_time: bool = True):
        try:
            rclpy.init()
        except Exception as e:
            if not rclpy.ok():
                sys.exit(f"ROS 2 context could not be initialised: {e}")
        Node.__init__(self, node_name)
        self.set_parameters(
            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
        )
        CartesianForceController.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_velocity_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_velocity_controller.py
@ -0,0 +1,121 @@
 import rclpy
 from rclpy.node import Node
 import numpy as np
 import quaternion
 from geometry_msgs.msg import Twist
 from geometry_msgs.msg import PoseStamped
 import tf2_ros
 import sys
 import time
 import threading
 import os
 class VelocityController:
    """Convert Twist messages to PoseStamped
    Use this node to integrate twist messages into a moving target pose in
    Cartesian space.  An initial TF lookup assures that the target pose always
    starts at the robot's end-effector.
    """
    def __init__(self, node: Node, topic_pose: str, topic_twist: str, base_frame: str, ee_frame: str):
        self.node = node
        self._frame_id = base_frame
        self._end_effector = ee_frame
        self.tf_buffer = tf2_ros.Buffer()
        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self)
        self.rot = np.quaternion(0, 0, 0, 1)
        self.pos = [0, 0, 0]
        self.pub = node.create_publisher(PoseStamped, topic_pose, 3)
        self.sub = node.create_subscription(Twist, topic_twist, self.twist_cb, 1)
        self.last = time.time()
        self.startup_done = False
        period = 1.0 / node.declare_parameter("publishing_rate", 100).value
        self.timer = node.create_timer(period, self.publish)
        self.publish_it = False
        self.thread = threading.Thread(target=self.startup, daemon=True)
        self.thread.start()
    def startup(self):
        """Make sure to start at the robot's current pose"""
        # Wait until we entered spinning in the main thread.
        time.sleep(1)
        try:
            start = self.tf_buffer.lookup_transform(
                target_frame=self._frame_id,
                source_frame=self._end_effector,
                time=rclpy.time.Time(),
            )
        except (
            tf2_ros.InvalidArgumentException,
            tf2_ros.LookupException,
            tf2_ros.ConnectivityException,
            tf2_ros.ExtrapolationException,
        ) as e:
            print(f"Startup failed: {e}")
            os._exit(1)
        self.pos[0] = start.transform.translation.x
        self.pos[1] = start.transform.translation.y
        self.pos[2] = start.transform.translation.z
        self.rot.x = start.transform.rotation.x
        self.rot.y = start.transform.rotation.y
        self.rot.z = start.transform.rotation.z
        self.rot.w = start.transform.rotation.w
        self.startup_done = True
    def twist_cb(self, data):
        """Numerically integrate twist message into a pose
        Use global self.frame_id as reference for the navigation commands.
        """
        now = time.time()
        dt = now - self.last
        self.last = now
        # Position update
        self.pos[0] += data.linear.x * dt
        self.pos[1] += data.linear.y * dt
        self.pos[2] += data.linear.z * dt
        # Orientation update
        wx = data.angular.x
        wy = data.angular.y
        wz = data.angular.z
        _, q = quaternion.integrate_angular_velocity(
            lambda _: (wx, wy, wz), 0, dt, self.rot
        )
        self.rot = q[-1]  # the last one is after dt passed
    def publish(self):
        if not self.startup_done:
            return
        if not self.publish_it:
            return
        try:
            msg = PoseStamped()
            msg.header.stamp = self.get_clock().now().to_msg()
            msg.header.frame_id = self.frame_id
            msg.pose.position.x = self.pos[0]
            msg.pose.position.y = self.pos[1]
            msg.pose.position.z = self.pos[2]
            msg.pose.orientation.x = self.rot.x
            msg.pose.orientation.y = self.rot.y
            msg.pose.orientation.z = self.rot.z
            msg.pose.orientation.w = self.rot.w
            self.pub.publish(msg)
        except Exception:
            # Swallow 'publish() to closed topic' error.
            # This rarely happens on killing this node.
            pass
--- a/env_manager/rbs_gym/rbs_gym/envs/control/grippper_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/grippper_controller.py
@ -0,0 +1,46 @@
 from typing import Optional
 from control_msgs.action import GripperCommand
 from rclpy.action import ActionClient
 class GripperController:
    def __init__(self, node,
                 open_position: Optional[float] = 0.0,
                 close_position: Optional[float] = 0.0,
                 max_effort: Optional[float] = 0.0,
                 namespace: Optional[str] = ""):
        self._action_client = ActionClient(node, GripperCommand,
                                           namespace + "/gripper_controller/gripper_cmd")
        self._open_position = open_position
        self._close_position = close_position
        self._max_effort = max_effort
    def open(self):
        self.send_goal(self._open_position)
    def close(self):
        self.send_goal(self._close_position)
    def send_goal(self, goal: float):
        goal_msg = GripperCommand.Goal()
        goal_msg._command.position = goal
        goal_msg._command.max_effort = self._max_effort
        self._action_client.wait_for_server()
        self._send_goal_future = self._action_client.send_goal_async(goal_msg)
        self._send_goal_future.add_done_callback(self.goal_response_callback)
    def goal_response_callback(self, future):
        goal_handle = future.result()
        if not goal_handle.accepted:
            self.get_logger().info('Goal rejected :(')
            return
        self.get_logger().info('Goal accepted :)')
        self._get_result_future = goal_handle.get_result_async()
        self._get_result_future.add_done_callback(self.get_result_callback)
    def get_result_callback(self, future):
        result = future.result().result
        self.get_logger().info(f"Gripper position: {result.position}")
--- a/env_manager/rbs_gym/rbs_gym/envs/control/joint_effort_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/joint_effort_controller.py
@ -0,0 +1,25 @@
 from typing import Optional
 from std_msgs.msg import Float64MultiArray
 class JointEffortController:
    def __init__(self, node, namespace: Optional[str] = "") -> None:
        self.node = node
        self.publisher = node.create_publisher(Float64MultiArray, 
                                               namespace + "/joint_effort_controller/commands", 10)
        # self.timer = node.create_timer(0.1, self.timer_callback)
        # self.publish = True
        self._effort_array = Float64MultiArray()
    @property
    def target_effort(self) -> Float64MultiArray:
        return self._effort_array
    @target_effort.setter
    def target_effort(self, data: Float64MultiArray):
        self._effort_array = data
    # def timer_callback(self):
    #     if self.publish:
    #         self.publisher.publish(self._target_wrench)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/init.py
@ -0,0 +1,5 @@
 from .lights import *
 from .objects import *
 from .robots import *
 from .sensors import *
 from .terrains import *
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/init.py
@ -0,0 +1,20 @@
 from gym_gz.scenario.model_wrapper import ModelWrapper
 from .random_sun import RandomSun
 from .sun import Sun
 def get_light_model_class(light_type: str) -> ModelWrapper:
    # TODO: Refactor into enum
    if "sun" == light_type:
        return Sun
    elif "random_sun" == light_type:
        return RandomSun
 def is_light_type_randomizable(light_type: str) -> bool:
    if "random_sun" == light_type:
        return True
    return False
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/random_sun.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/random_sun.py
@ -0,0 +1,158 @@
 from typing import Optional, Tuple
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 class RandomSun(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "sun",
        minmax_elevation: Tuple[float, float] = (-0.15, -0.65),
        distance: float = 800.0,
        visual: bool = True,
        radius: float = 20.0,
        color_minmax_r: Tuple[float, float] = (1.0, 1.0),
        color_minmax_g: Tuple[float, float] = (1.0, 1.0),
        color_minmax_b: Tuple[float, float] = (1.0, 1.0),
        specular: float = 1.0,
        attenuation_minmax_range: Tuple[float, float] = (750.0, 15000.0),
        attenuation_minmax_constant: Tuple[float, float] = (0.5, 1.0),
        attenuation_minmax_linear: Tuple[float, float] = (0.001, 0.1),
        attenuation_minmax_quadratic: Tuple[float, float] = (0.0001, 0.01),
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        if np_random is None:
            np_random = np.random.default_rng()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Get random yaw direction
        direction = np_random.uniform(-1.0, 1.0, (2,))
        # Normalize yaw direction
        direction = direction / np.linalg.norm(direction)
        # Get random elevation
        direction = np.append(
            direction,
            np_random.uniform(minmax_elevation[0], minmax_elevation[1]),
        )
        # Normalize again
        direction = direction / np.linalg.norm(direction)
        # Initial pose
        initial_pose = scenario.Pose(
            (
                -direction[0] * distance,
                -direction[1] * distance,
                -direction[2] * distance,
            ),
            (1, 0, 0, 0),
        )
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            direction=direction,
            visual=visual,
            radius=radius,
            color_minmax_r=color_minmax_r,
            color_minmax_g=color_minmax_g,
            color_minmax_b=color_minmax_b,
            attenuation_minmax_range=attenuation_minmax_range,
            attenuation_minmax_constant=attenuation_minmax_constant,
            attenuation_minmax_linear=attenuation_minmax_linear,
            attenuation_minmax_quadratic=attenuation_minmax_quadratic,
            specular=specular,
            np_random=np_random,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        direction: Tuple[float, float, float],
        visual: bool,
        radius: float,
        color_minmax_r: Tuple[float, float],
        color_minmax_g: Tuple[float, float],
        color_minmax_b: Tuple[float, float],
        attenuation_minmax_range: Tuple[float, float],
        attenuation_minmax_constant: Tuple[float, float],
        attenuation_minmax_linear: Tuple[float, float],
        attenuation_minmax_quadratic: Tuple[float, float],
        specular: float,
        np_random: RandomState,
    ) -> str:
        # Sample random values for parameters
        color_r = np_random.uniform(color_minmax_r[0], color_minmax_r[1])
        color_g = np_random.uniform(color_minmax_g[0], color_minmax_g[1])
        color_b = np_random.uniform(color_minmax_b[0], color_minmax_b[1])
        attenuation_range = np_random.uniform(
            attenuation_minmax_range[0], attenuation_minmax_range[1]
        )
        attenuation_constant = np_random.uniform(
            attenuation_minmax_constant[0], attenuation_minmax_constant[1]
        )
        attenuation_linear = np_random.uniform(
            attenuation_minmax_linear[0], attenuation_minmax_linear[1]
        )
        attenuation_quadratic = np_random.uniform(
            attenuation_minmax_quadratic[0], attenuation_minmax_quadratic[1]
        )
        return f'''<sdf version="1.9">
                <model name="{model_name}">
                    <static>true</static>
                    <link name="{model_name}_link">
                        <light type="directional" name="{model_name}_light">
                            <direction>{direction[0]} {direction[1]} {direction[2]}</direction>
                            <attenuation>
                                <range>{attenuation_range}</range>
                                <constant>{attenuation_constant}</constant>
                                <linear>{attenuation_linear}</linear>
                                <quadratic>{attenuation_quadratic}</quadratic>
                            </attenuation>
                            <diffuse>{color_r} {color_g} {color_b} 1</diffuse>
                            <specular>{specular*color_r} {specular*color_g} {specular*color_b} 1</specular>
                            <cast_shadows>true</cast_shadows>
                        </light>
                        {
                        f"""
                        <visual name="{model_name}_visual">
                            <geometry>
                                <sphere>
                                    <radius>{radius}</radius>
                                </sphere>
                            </geometry>
                            <material>
                                <emissive>{color_r} {color_g} {color_b} 1</emissive>
                            </material>
                            <cast_shadows>false</cast_shadows>
                        </visual>
                        """ if visual else ""
                        }
                    </link>
                </model>
            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/sun.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/sun.py
@ -0,0 +1,119 @@
 from typing import List, Tuple
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Sun(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "sun",
        direction: Tuple[float, float, float] = (0.5, -0.25, -0.75),
        color: List[float] = (1.0, 1.0, 1.0, 1.0),
        distance: float = 800.0,
        visual: bool = True,
        radius: float = 20.0,
        specular: float = 1.0,
        attenuation_range: float = 10000.0,
        attenuation_constant: float = 0.9,
        attenuation_linear: float = 0.01,
        attenuation_quadratic: float = 0.001,
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Normalize direction
        direction = np.array(direction)
        direction = direction / np.linalg.norm(direction)
        # Initial pose
        initial_pose = scenario.Pose(
            (
                -direction[0] * distance,
                -direction[1] * distance,
                -direction[2] * distance,
            ),
            (1, 0, 0, 0),
        )
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            direction=direction,
            color=color,
            visual=visual,
            radius=radius,
            specular=specular,
            attenuation_range=attenuation_range,
            attenuation_constant=attenuation_constant,
            attenuation_linear=attenuation_linear,
            attenuation_quadratic=attenuation_quadratic,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        direction: Tuple[float, float, float],
        color: Tuple[float, float, float, float],
        visual: bool,
        radius: float,
        specular: float,
        attenuation_range: float,
        attenuation_constant: float,
        attenuation_linear: float,
        attenuation_quadratic: float,
    ) -> str:
        return f'''<sdf version="1.9">
                <model name="{model_name}">
                    <static>true</static>
                    <link name="{model_name}_link">
                        <light type="directional" name="{model_name}_light">
                            <direction>{direction[0]} {direction[1]} {direction[2]}</direction>
                            <attenuation>
                                <range>{attenuation_range}</range>
                                <constant>{attenuation_constant}</constant>
                                <linear>{attenuation_linear}</linear>
                                <quadratic>{attenuation_quadratic}</quadratic>
                            </attenuation>
                            <diffuse>{color[0]} {color[1]} {color[2]} 1</diffuse>
                            <specular>{specular*color[0]} {specular*color[1]} {specular*color[2]} 1</specular>
                            <cast_shadows>true</cast_shadows>
                        </light>
                        {
                        f"""
                        <visual name="{model_name}_visual">
                            <geometry>
                                <sphere>
                                    <radius>{radius}</radius>
                                </sphere>
                            </geometry>
                            <material>
                                <emissive>{color[0]} {color[1]} {color[2]} 1</emissive>
                            </material>
                            <cast_shadows>false</cast_shadows>
                        </visual>
                        """ if visual else ""
                        }
                    </link>
                </model>
            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/init.py
@ -0,0 +1,35 @@
 from gym_gz.scenario.model_wrapper import ModelWrapper
 from .primitives import Box, Cylinder, Plane, Sphere
 from .random_lunar_rock import RandomLunarRock
 from .random_object import RandomObject
 from .random_primitive import RandomPrimitive
 from .rock import Rock
 def get_object_model_class(object_type: str) -> ModelWrapper:
    # TODO: Refactor into enum
    if "box" == object_type:
        return Box
    elif "sphere" == object_type:
        return Sphere
    elif "cylinder" == object_type:
        return Cylinder
    elif "random_primitive" == object_type:
        return RandomPrimitive
    elif "random_mesh" == object_type:
        return RandomObject
    elif "rock" == object_type:
        return Rock
    elif "random_lunar_rock" == object_type:
        return RandomLunarRock
 def is_object_type_randomizable(object_type: str) -> bool:
    return (
        "random_primitive" == object_type
        or "random_mesh" == object_type
        or "random_lunar_rock" == object_type
    )
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/init.py
@ -0,0 +1,4 @@
 from .box import Box
 from .cylinder import Cylinder
 from .plane import Plane
 from .sphere import Sphere
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/box.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/box.py
@ -0,0 +1,129 @@
 from typing import List
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Box(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "box",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        size: List[float] = (0.05, 0.05, 0.05),
        mass: float = 0.1,
        static: bool = False,
        collision: bool = True,
        friction: float = 1.0,
        visual: bool = True,
        gui_only: bool = False,
        color: List[float] = (0.8, 0.8, 0.8, 1.0),
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            size=size,
            mass=mass,
            static=static,
            collision=collision,
            friction=friction,
            visual=visual,
            gui_only=gui_only,
            color=color,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        size: List[float],
        mass: float,
        static: bool,
        collision: bool,
        friction: float,
        visual: bool,
        gui_only: bool,
        color: List[float],
    ) -> str:
        return f'''<sdf version="1.7">
                <model name="{model_name}">
                    <static>{"true" if static else "false"}</static>
                    <link name="{model_name}_link">
                        {
                        f"""
                        <collision name="{model_name}_collision">
                            <geometry>
                                <box>
                                    <size>{size[0]} {size[1]} {size[2]}</size>
                                </box>
                            </geometry>
                            <surface>
                                <friction>
                                    <ode>
                                        <mu>{friction}</mu>
                                        <mu2>{friction}</mu2>
                                        <fdir1>0 0 0</fdir1>
                                        <slip1>0.0</slip1>
                                        <slip2>0.0</slip2>
                                    </ode>
                                </friction>
                            </surface>
                        </collision>
                        """ if collision else ""
                        }
                        {
                        f"""
                        <visual name="{model_name}_visual">
                            <geometry>
                                <box>
                                    <size>{size[0]} {size[1]} {size[2]}</size>
                                </box>
                            </geometry>
                            <material>
                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
                            </material>
                            <transparency>{1.0-color[3]}</transparency>
                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
                        </visual>
                        """ if visual else ""
                        }
                        <inertial>
                            <mass>{mass}</mass>
                            <inertia>
                                <ixx>{(size[1]**2 + size[2]**2)*mass/12}</ixx>
                                <iyy>{(size[0]**2 + size[2]**2)*mass/12}</iyy>
                                <izz>{(size[0]**2 + size[1]**2)*mass/12}</izz>
                                <ixy>0.0</ixy>
                                <ixz>0.0</ixz>
                                <iyz>0.0</iyz>
                            </inertia>
                        </inertial>
                    </link>
                </model>
            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/cylinder.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/cylinder.py
@ -0,0 +1,137 @@
 from typing import List
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Cylinder(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "cylinder",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        radius: float = 0.025,
        length: float = 0.1,
        mass: float = 0.1,
        static: bool = False,
        collision: bool = True,
        friction: float = 1.0,
        visual: bool = True,
        gui_only: bool = False,
        color: List[float] = (0.8, 0.8, 0.8, 1.0),
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            radius=radius,
            length=length,
            mass=mass,
            static=static,
            collision=collision,
            friction=friction,
            visual=visual,
            gui_only=gui_only,
            color=color,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        radius: float,
        length: float,
        mass: float,
        static: bool,
        collision: bool,
        friction: float,
        visual: bool,
        gui_only: bool,
        color: List[float],
    ) -> str:
        # Inertia is identical for xx and yy components, compute only once
        inertia_xx_yy = (3 * radius**2 + length**2) * mass / 12
        return f'''<sdf version="1.7">
                <model name="{model_name}">
                    <static>{"true" if static else "false"}</static>
                    <link name="{model_name}_link">
                        {
                        f"""
                        <collision name="{model_name}_collision">
                            <geometry>
                                <cylinder>
                                    <radius>{radius}</radius>
                                    <length>{length}</length>
                                </cylinder>
                            </geometry>
                            <surface>
                                <friction>
                                    <ode>
                                        <mu>{friction}</mu>
                                        <mu2>{friction}</mu2>
                                        <fdir1>0 0 0</fdir1>
                                        <slip1>0.0</slip1>
                                        <slip2>0.0</slip2>
                                    </ode>
                                </friction>
                            </surface>
                        </collision>
                        """ if collision else ""
                        }
                        {
                        f"""
                        <visual name="{model_name}_visual">
                            <geometry>
                                <cylinder>
                                    <radius>{radius}</radius>
                                    <length>{length}</length>
                                </cylinder>
                            </geometry>
                            <material>
                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
                            </material>
                            <transparency>{1.0-color[3]}</transparency>
                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
                        </visual>
                        """ if visual else ""
                        }
                        <inertial>
                            <mass>{mass}</mass>
                            <inertia>
                                <ixx>{inertia_xx_yy}</ixx>
                                <iyy>{inertia_xx_yy}</iyy>
                                <izz>{(mass*radius**2)/2}</izz>
                                <ixy>0.0</ixy>
                                <ixz>0.0</ixz>
                                <iyz>0.0</iyz>
                            </inertia>
                        </inertial>
                    </link>
                </model>
            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/plane.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/plane.py
@ -0,0 +1,90 @@
 from typing import List
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Plane(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "plane",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        size: List[float] = (1.0, 1.0),
        direction: List[float] = (0.0, 0.0, 1.0),
        collision: bool = True,
        friction: float = 1.0,
        visual: bool = True,
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = f'''<sdf version="1.7">
            <model name="{model_name}">
                <static>true</static>
                <link name="{model_name}_link">
                    {
                    f"""
                    <collision name="{model_name}_collision">
                        <geometry>
                            <plane>
                                <normal>{direction[0]} {direction[1]} {direction[2]}</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <surface>
                            <friction>
                                <ode>
                                    <mu>{friction}</mu>
                                    <mu2>{friction}</mu2>
                                    <fdir1>0 0 0</fdir1>
                                    <slip1>0.0</slip1>
                                    <slip2>0.0</slip2>
                                </ode>
                            </friction>
                        </surface>
                    </collision>
                    """ if collision else ""
                    }
                    {
                    f"""
                    <visual name="{model_name}_visual">
                        <geometry>
                            <plane>
                                <normal>{direction[0]} {direction[1]} {direction[2]}</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <material>
                            <ambient>0.8 0.8 0.8 1</ambient>
                            <diffuse>0.8 0.8 0.8 1</diffuse>
                            <specular>0.8 0.8 0.8 1</specular>
                        </material>
                    </visual>
                    """ if visual else ""
                    }
                </link>
            </model>
        </sdf>'''
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/sphere.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/sphere.py
@ -0,0 +1,132 @@
 from typing import List
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Sphere(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "sphere",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        radius: float = 0.025,
        mass: float = 0.1,
        static: bool = False,
        collision: bool = True,
        friction: float = 1.0,
        visual: bool = True,
        gui_only: bool = False,
        color: List[float] = (0.8, 0.8, 0.8, 1.0),
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            radius=radius,
            mass=mass,
            static=static,
            collision=collision,
            friction=friction,
            visual=visual,
            gui_only=gui_only,
            color=color,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        radius: float,
        mass: float,
        static: bool,
        collision: bool,
        friction: float,
        visual: bool,
        gui_only: bool,
        color: List[float],
    ) -> str:
        # Inertia is identical for all axes
        inertia_xx_yy_zz = (mass * radius**2) * 2 / 5
        return f'''<sdf version="1.7">
                <model name="{model_name}">
                    <static>{"true" if static else "false"}</static>
                    <link name="{model_name}_link">
                        {
                        f"""
                        <collision name="{model_name}_collision">
                            <geometry>
                                <sphere>
                                    <radius>{radius}</radius>
                                </sphere>
                            </geometry>
                            <surface>
                                <friction>
                                    <ode>
                                        <mu>{friction}</mu>
                                        <mu2>{friction}</mu2>
                                        <fdir1>0 0 0</fdir1>
                                        <slip1>0.0</slip1>
                                        <slip2>0.0</slip2>
                                    </ode>
                                </friction>
                            </surface>
                        </collision>
                        """ if collision else ""
                        }
                        {
                        f"""
                        <visual name="{model_name}_visual">
                            <geometry>
                                <sphere>
                                    <radius>{radius}</radius>
                                </sphere>
                            </geometry>
                            <material>
                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
                            </material>
                            <transparency>{1.0-color[3]}</transparency>
                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
                        </visual>
                        """ if visual else ""
                        }
                        <inertial>
                            <mass>{mass}</mass>
                            <inertia>
                                <ixx>{inertia_xx_yy_zz}</ixx>
                                <iyy>{inertia_xx_yy_zz}</iyy>
                                <izz>{inertia_xx_yy_zz}</izz>
                                <ixy>0.0</ixy>
                                <ixz>0.0</ixz>
                                <iyz>0.0</iyz>
                            </inertia>
                        </inertial>
                    </link>
                </model>
            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_lunar_rock.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_lunar_rock.py
@ -0,0 +1,57 @@
 import os
 from typing import List, Optional
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 class RandomLunarRock(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "rock",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        models_dir: Optional[str] = None,
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        if np_random is None:
            np_random = np.random.default_rng()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Get path to all lunar rock models
        if not models_dir:
            models_dir = os.environ.get("SDF_PATH_LUNAR_ROCK", default="")
        # Make sure the path exists
        if not os.path.exists(models_dir):
            raise ValueError(
                f"Invalid path '{models_dir}' pointed by 'SDF_PATH_LUNAR_ROCK' environment variable."
            )
        # Select a single model at random
        model_dir = np_random.choice(os.listdir(models_dir))
        sdf_filepath = os.path.join(model_dir, "model.sdf")
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_file(
            sdf_filepath, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_object.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_object.py
@ -0,0 +1,60 @@
 from typing import List, Optional
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 from rbs_gym.envs.models.utils import ModelCollectionRandomizer
 class RandomObject(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "object",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        model_paths: str = None,
        owner: str = "GoogleResearch",
        collection: str = "Google Scanned Objects",
        server: str = "https://fuel.ignitionrobotics.org",
        server_version: str = "1.0",
        unique_cache: bool = False,
        reset_collection: bool = False,
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        model_collection_randomizer = ModelCollectionRandomizer(
            model_paths=model_paths,
            owner=owner,
            collection=collection,
            server=server,
            server_version=server_version,
            unique_cache=unique_cache,
            reset_collection=reset_collection,
            np_random=np_random,
        )
        # Note: using default arguments here
        modified_sdf_file = model_collection_randomizer.random_model()
        # Insert the model
        ok_model = world.to_gazebo().insert_model(
            modified_sdf_file, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_primitive.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_primitive.py
@ -0,0 +1,124 @@
 from typing import List, Optional, Union
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 from . import Box, Cylinder, Sphere
 class RandomPrimitive(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "primitive",
        use_specific_primitive: Union[str, None] = None,
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        static: bool = False,
        collision: bool = True,
        visual: bool = True,
        gui_only: bool = False,
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        if np_random is None:
            np_random = np.random.default_rng()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = self.get_sdf(
            model_name=model_name,
            use_specific_primitive=use_specific_primitive,
            static=static,
            collision=collision,
            visual=visual,
            gui_only=gui_only,
            np_random=np_random,
        )
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
    @classmethod
    def get_sdf(
        self,
        model_name: str,
        use_specific_primitive: Union[str, None],
        static: bool,
        collision: bool,
        visual: bool,
        gui_only: bool,
        np_random: RandomState,
    ) -> str:
        if use_specific_primitive is not None:
            primitive = use_specific_primitive
        else:
            primitive = np_random.choice(["box", "cylinder", "sphere"])
        mass = np_random.uniform(0.05, 0.25)
        friction = np_random.uniform(0.75, 1.5)
        color = list(np_random.uniform(0.0, 1.0, (3,)))
        color.append(1.0)
        if "box" == primitive:
            return Box.get_sdf(
                model_name=model_name,
                size=list(np_random.uniform(0.04, 0.06, (3,))),
                mass=mass,
                static=static,
                collision=collision,
                friction=friction,
                visual=visual,
                gui_only=gui_only,
                color=color,
            )
        elif "cylinder" == primitive:
            return Cylinder.get_sdf(
                model_name=model_name,
                radius=np_random.uniform(0.01, 0.0375),
                length=np_random.uniform(0.025, 0.05),
                mass=mass,
                static=static,
                collision=collision,
                friction=friction,
                visual=visual,
                gui_only=gui_only,
                color=color,
            )
        elif "sphere" == primitive:
            return Sphere.get_sdf(
                model_name=model_name,
                radius=np_random.uniform(0.01, 0.0375),
                mass=mass,
                static=static,
                collision=collision,
                friction=friction,
                visual=visual,
                gui_only=gui_only,
                color=color,
            )
        else:
            raise TypeError(
                f"Type '{use_specific_primitive}' in not a supported primitive. Pleasure use 'box', 'cylinder' or 'sphere."
            )
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/rock.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/rock.py
@ -0,0 +1,52 @@
 from typing import List
 from gym_gz.scenario import model_with_file, model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 from scenario import gazebo as scenario_gazebo
 class Rock(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
    def __init__(
        self,
        world: scenario.World,
        name: str = "rock",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        model_file: str = None,
        use_fuel: bool = True,
        variant: int = 6,
        **kwargs,
    ):
        # Allow passing of custom model file as an argument
        if model_file is None:
            model_file = self.get_model_file(fuel=use_fuel, variant=variant)
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_file(
            model_file, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        super().__init__(model=model)
    @classmethod
    def get_model_file(self, fuel: bool = False, variant: int = 6) -> str:
        if fuel:
            return scenario_gazebo.get_model_file_from_fuel(
                f"https://fuel.ignitionrobotics.org/1.0/OpenRobotics/models/Falling Rock {variant}"
            )
        else:
            return "lunar_surface"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/init.py
@ -0,0 +1,15 @@
 from gym_gz.scenario.model_wrapper import ModelWrapper
 from .rbs_arm import RbsArm
 # from .panda import Panda
 # TODO: When adding new a robot, create abstract classes to simplify such process
 def get_robot_model_class(robot_model: str) -> ModelWrapper:
    # TODO: Refactor into enum
    if "rbs_arm" == robot_model:
        return RbsArm
    # elif "panda" == robot_model:
    #     return Panda
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/panda.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/panda.py
@ -0,0 +1,348 @@
 from os import path
 from typing import Dict, List, Optional, Tuple
 from ament_index_python.packages import get_package_share_directory
 from gym_gz.scenario import model_with_file, model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 from scenario import gazebo as scenario_gazebo
 from rbs_gym.envs.models.utils import xacro2sdf
 class Panda(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
    ROBOT_MODEL_NAME: str = "panda"
    DEFAULT_PREFIX: str = "panda_"
    __DESCRIPTION_PACKAGE = ROBOT_MODEL_NAME + "_description"
    __DEFAULT_XACRO_FILE = path.join(
        get_package_share_directory(__DESCRIPTION_PACKAGE),
        "urdf",
        ROBOT_MODEL_NAME + ".urdf.xacro",
    )
    __DEFAULT_XACRO_MAPPINGS: Dict[str, any] = {
        "name": ROBOT_MODEL_NAME,
        "gripper": True,
        "collision_arm": False,
        "collision_gripper": True,
        "ros2_control": True,
        "ros2_control_plugin": "ign",
        "ros2_control_command_interface": "effort",
        "gazebo_preserve_fixed_joint": True,
    }
    __XACRO_MODEL_PATH_REMAP: Tuple[str, str] = (
        __DESCRIPTION_PACKAGE,
        ROBOT_MODEL_NAME,
    )
    DEFAULT_ARM_JOINT_POSITIONS: List[float] = (
        0.0,
        -0.7853981633974483,
        0.0,
        -2.356194490192345,
        0.0,
        1.5707963267948966,
        0.7853981633974483,
    )
    OPEN_GRIPPER_JOINT_POSITIONS: List[float] = (
        0.04,
        0.04,
    )
    CLOSED_GRIPPER_JOINT_POSITIONS: List[float] = (
        0.0,
        0.0,
    )
    DEFAULT_GRIPPER_JOINT_POSITIONS: List[float] = OPEN_GRIPPER_JOINT_POSITIONS
    BASE_LINK_Z_OFFSET: float = 0.0
    def __init__(
        self,
        world: scenario.World,
        name: str = ROBOT_MODEL_NAME,
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        model_file: str = None,
        use_fuel: bool = False,
        use_xacro: bool = True,
        xacro_file: str = __DEFAULT_XACRO_FILE,
        xacro_mappings: Dict[str, any] = __DEFAULT_XACRO_MAPPINGS,
        initial_arm_joint_positions: List[float] = DEFAULT_ARM_JOINT_POSITIONS,
        initial_gripper_joint_positions: List[float] = OPEN_GRIPPER_JOINT_POSITIONS,
        **kwargs,
    ):
        # Store params that are needed internally
        self.__prefix = f"{name}_"
        self.__initial_arm_joint_positions = initial_arm_joint_positions
        self.__initial_gripper_joint_positions = initial_gripper_joint_positions
        # Allow passing of custom model file as an argument
        if model_file is None:
            if use_xacro:
                # Generate SDF from xacro
                mappings = self.__DEFAULT_XACRO_MAPPINGS
                mappings.update(kwargs)
                mappings.update(xacro_mappings)
                model_file = xacro2sdf(
                    input_file_path=xacro_file,
                    mappings=mappings,
                    model_path_remap=self.__XACRO_MODEL_PATH_REMAP,
                )
            else:
                # Otherwise, use the default SDF file (local or fuel)
                model_file = self.get_model_file(fuel=use_fuel)
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Determine whether to insert from string or file
        if use_xacro:
            insert_fn = scenario_gazebo.World.insert_model_from_string
        else:
            insert_fn = scenario_gazebo.World.insert_model_from_file
        # Insert the model
        ok_model = insert_fn(world.to_gazebo(), model_file, initial_pose, model_name)
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Set initial joint configuration
        self.set_initial_joint_positions(model)
        # Initialize base class
        super().__init__(model=model)
    def set_initial_joint_positions(self, model):
        model = model.to_gazebo()
        if not model.reset_joint_positions(
            self.initial_arm_joint_positions, self.arm_joint_names
        ):
            raise RuntimeError("Failed to set initial positions of arm's joints")
        if not model.reset_joint_positions(
            self.initial_gripper_joint_positions, self.gripper_joint_names
        ):
            raise RuntimeError("Failed to set initial positions of gripper's joints")
    @classmethod
    def get_model_file(cls, fuel: bool = False) -> str:
        if fuel:
            raise NotImplementedError
            return scenario_gazebo.get_model_file_from_fuel(
                "https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/"
                + cls.ROBOT_MODEL_NAME
            )
        else:
            return cls.ROBOT_MODEL_NAME
    # Meta information #
    @property
    def is_mobile(self) -> bool:
        return False
    # Prefix #
    @property
    def prefix(self) -> str:
        return self.__prefix
    # Joints #
    @property
    def joint_names(self) -> List[str]:
        return self.move_base_joint_names + self.manipulator_joint_names
    @property
    def move_base_joint_names(self) -> List[str]:
        return []
    @property
    def manipulator_joint_names(self) -> List[str]:
        return self.arm_joint_names + self.gripper_joint_names
    @classmethod
    def get_arm_joint_names(cls, prefix: str = "") -> List[str]:
        return [
            prefix + "joint1",
            prefix + "joint2",
            prefix + "joint3",
            prefix + "joint4",
            prefix + "joint5",
            prefix + "joint6",
            prefix + "joint7",
        ]
    @property
    def arm_joint_names(self) -> List[str]:
        return self.get_arm_joint_names(self.prefix)
    @classmethod
    def get_gripper_joint_names(cls, prefix: str = "") -> List[str]:
        return [
            prefix + "finger_joint1",
            prefix + "finger_joint2",
        ]
    @property
    def gripper_joint_names(self) -> List[str]:
        return self.get_gripper_joint_names(self.prefix)
    @property
    def move_base_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
        return None
    @property
    def arm_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
        return [
            (-2.897246558310587, 2.897246558310587),
            (-1.762782544514273, 1.762782544514273),
            (-2.897246558310587, 2.897246558310587),
            (-3.07177948351002, -0.06981317007977318),
            (-2.897246558310587, 2.897246558310587),
            (-0.0174532925199433, 3.752457891787809),
            (-2.897246558310587, 2.897246558310587),
        ]
    @property
    def gripper_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
        return [
            (0.0, 0.04),
            (0.0, 0.04),
        ]
    @property
    def gripper_joints_close_towards_positive(self) -> bool:
        return (
            self.OPEN_GRIPPER_JOINT_POSITIONS[0]
            < self.CLOSED_GRIPPER_JOINT_POSITIONS[0]
        )
    @property
    def initial_arm_joint_positions(self) -> List[float]:
        return self.__initial_arm_joint_positions
    @property
    def initial_gripper_joint_positions(self) -> List[float]:
        return self.__initial_gripper_joint_positions
    # Passive joints #
    @property
    def passive_joint_names(self) -> List[str]:
        return self.manipulator_passive_joint_names + self.move_base_passive_joint_names
    @property
    def move_base_passive_joint_names(self) -> List[str]:
        return []
    @property
    def manipulator_passive_joint_names(self) -> List[str]:
        return self.arm_passive_joint_names + self.gripper_passive_joint_names
    @property
    def arm_passive_joint_names(self) -> List[str]:
        return []
    @property
    def gripper_passive_joint_names(self) -> List[str]:
        return []
    # Links #
    @classmethod
    def get_robot_base_link_name(cls, prefix: str = "") -> str:
        return cls.get_arm_base_link_name(prefix)
    @property
    def robot_base_link_name(self) -> str:
        return self.get_robot_base_link_name(self.prefix)
    @classmethod
    def get_arm_base_link_name(cls, prefix: str = "") -> str:
        # Same as `self.arm_link_names[0]``
        return prefix + "link0"
    @property
    def arm_base_link_name(self) -> str:
        return self.get_arm_base_link_name(self.prefix)
    @classmethod
    def get_ee_link_name(cls, prefix: str = "") -> str:
        return prefix + "hand_tcp"
    @property
    def ee_link_name(self) -> str:
        return self.get_ee_link_name(self.prefix)
    @classmethod
    def get_wheel_link_names(cls, prefix: str = "") -> List[str]:
        return []
    @property
    def wheel_link_names(self) -> List[str]:
        return self.get_wheel_link_names(self.prefix)
    @classmethod
    def get_arm_link_names(cls, prefix: str = "") -> List[str]:
        return [
            prefix + "link0",
            prefix + "link1",
            prefix + "link2",
            prefix + "link3",
            prefix + "link4",
            prefix + "link5",
            prefix + "link6",
            prefix + "link7",
        ]
    @property
    def arm_link_names(self) -> List[str]:
        return self.get_arm_link_names(self.prefix)
    @classmethod
    def get_gripper_link_names(cls, prefix: str = "") -> List[str]:
        return [
            prefix + "leftfinger",
            prefix + "rightfinger",
        ]
    @property
    def gripper_link_names(self) -> List[str]:
        return self.get_gripper_link_names(self.prefix)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/rbs_arm.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/rbs_arm.py
@ -0,0 +1,261 @@
 from typing import Dict, List, Optional, Tuple
 from gym_gz.scenario import model_with_file, model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import gazebo as sgaz
 import numpy as np
 from scenario import core as scenario
 from scenario import gazebo as scenario_gazebo
 class RbsArm(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
    DEFAULT_ARM_JOINT_POSITIONS: List[float] = (
        0.0, 0.5, 3.14159, 1.5, 0.0, 1.4, 0.0,
    )
    OPEN_GRIPPER_JOINT_POSITIONS: List[float] = (
        0.064,
        0.064,
    )
    CLOSED_GRIPPER_JOINT_POSITIONS: List[float] = (
        0.0,
        0.0,
    )
    DEFAULT_GRIPPER_JOINT_POSITIONS: List[float] = OPEN_GRIPPER_JOINT_POSITIONS
    BASE_LINK_Z_OFFSET: float = 0.0
    DEFAULT_PREFIX: str = ""
    ROBOT_MODEL_NAME: str = "rbs_arm"
    def __init__(
        self,
        world: scenario.World,
        name: str = "rbs_arm",
        position: List[float] = (0.0, 0.0, 0.0),
        orientation: List[float] = (1.0, 0, 0, 0),
        model_file: Optional[str] = None,
        use_fuel: bool = False,
        use_xacro: bool = False,
        xacro_file: str = "",
        xacro_mappings: Dict[str, any] = {},
        initial_arm_joint_positions: List[float] = DEFAULT_ARM_JOINT_POSITIONS,
        initial_gripper_joint_positions: List[float] = DEFAULT_GRIPPER_JOINT_POSITIONS,
        **kwargs
    ):
        self.__prefix = f"{name}_"
        self.__initial_arm_joint_positions = initial_arm_joint_positions
        self.__initial_gripper_joint_positions = initial_gripper_joint_positions
        model_file = self.get_model_file()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Determine whether to insert from string or file
        if use_xacro:
            insert_fn = scenario_gazebo.World.insert_model_from_string
        else:
            insert_fn = scenario_gazebo.World.insert_model_from_file
        # Insert the model
        ok_model = insert_fn(world.to_gazebo(), model_file, initial_pose, model_name)
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Set initial joint configuration
        self.set_initial_joint_positions(model)
        # Initialize base class
        super().__init__(model=model)
    def set_initial_joint_positions(self, model):
        model = model.to_gazebo()
        if not model.reset_joint_positions(
            self.initial_arm_joint_positions, self.arm_joint_names
        ):
            raise RuntimeError("Failed to set initial positions of arm's joints")
        if not model.reset_joint_positions(
            self.initial_gripper_joint_positions, self.gripper_joint_names
        ):
            raise RuntimeError("Failed to set initial positions of gripper's joints")
    # Meta information #
    @property
    def is_mobile(self) -> bool:
        return False
    # Prefix #
    @property
    def prefix(self) -> str:
        return self.__prefix
    # Joints #
    @property
    def joint_names(self) -> List[str]:
        return self.move_base_joint_names + self.manipulator_joint_names
    @property
    def move_base_joint_names(self) -> List[str]:
        return []
    @property
    def manipulator_joint_names(self) -> List[str]:
        return self.arm_joint_names + self.gripper_joint_names
    @classmethod
    def get_arm_joint_names(cls, prefix: str) -> List[str]:
        return [
            "fork0_link_joint",
            "main0_link_joint",
            "fork1_link_joint",
            "main1_link_joint",
            "fork2_link_joint",
            "ee_link_joint",
            "rbs_gripper_rot_base_joint",
        ]
    @property
    def arm_joint_names(self) -> List[str]:
        return self.get_arm_joint_names(self.prefix)
    @classmethod
    def get_gripper_joint_names(cls, prefix: str) -> List[str]:
        return [
            "rbs_gripper_r_finger_joint",
            "rbs_gripper_l_finger_joint"
        ]
    @property
    def gripper_joint_names(self) -> List[str]:
        return self.get_gripper_joint_names(self.prefix)
    @property
    def arm_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
        return [
            (-3.14159, 3.14159),
            (-1.5708, 3.14159),
            (-3.14159, 3.14159),
            (-1.5708, 3.14159),
            (-3.14159, 3.14159),
            (-1.5708, 3.14159),
            (-3.14159, 3.14159),
        ]
    @property
    def gripper_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
        return [
            (0.0, 0.064),
            (0.0, 0.064),
        ]
    @property
    def gripper_joints_close_towards_positive(self) -> bool:
        return (
            self.OPEN_GRIPPER_JOINT_POSITIONS[0]
            < self.CLOSED_GRIPPER_JOINT_POSITIONS[0]
        )
    @property
    def initial_arm_joint_positions(self) -> List[float]:
        return self.__initial_arm_joint_positions
    @property
    def initial_gripper_joint_positions(self) -> List[float]:
        return self.__initial_gripper_joint_positions
    # Passive joints #
    @property
    def passive_joint_names(self) -> List[str]:
        return self.manipulator_passive_joint_names + self.move_base_passive_joint_names
    @property
    def move_base_passive_joint_names(self) -> List[str]:
        return []
    @property
    def manipulator_passive_joint_names(self) -> List[str]:
        return self.arm_passive_joint_names + self.gripper_passive_joint_names
    @property
    def arm_passive_joint_names(self) -> List[str]:
        return []
    @property
    def gripper_passive_joint_names(self) -> List[str]:
        return []
    # Links #
    @classmethod
    def get_robot_base_link_name(cls, prefix: str = "") -> str:
        return cls.get_arm_base_link_name(prefix)
    @property
    def robot_base_link_name(self) -> str:
        return self.get_robot_base_link_name(self.prefix)
    @classmethod
    def get_arm_base_link_name(cls, prefix: str = "") -> str:
        # Same as `self.arm_link_names[0]``
        return "base_link"
    @property
    def arm_base_link_name(self) -> str:
        return self.get_arm_base_link_name(self.prefix)
    @classmethod
    def get_ee_link_name(cls, prefix: str = "") -> str:
        return "gripper_grasp_point"
    @property
    def ee_link_name(self) -> str:
        return self.get_ee_link_name(self.prefix)
    @classmethod
    def get_wheel_link_names(cls, prefix: str = "") -> List[str]:
        return []
    @property
    def wheel_link_names(self) -> List[str]:
        return self.get_wheel_link_names(self.prefix)
    @classmethod
    def get_arm_link_names(cls, prefix: str) -> List[str]:
        return ["fork0_link",
                "main0_link",
                "fork1_link",
                "main1_link",
                "fork2_link",
                "tool0",
                "ee_link",
                "rbs_gripper_rot_base_link"]
    @property
    def arm_link_names(self) -> List[str]:
        return self.get_arm_link_names(self.prefix)
    @classmethod
    def get_gripper_link_names(cls, prefix: str) -> List[str]:
        return [
            "rbs_gripper_l_finger_link",
            "rbs_gripper_r_finger_link"
        ]
    @property
    def gripper_link_names(self) -> List[str]:
        return self.get_gripper_link_names(self.prefix)
    @classmethod
    def get_model_file(cls) -> str:
        return "/home/bill-finger/rbs_ws/current.urdf"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/sensors/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/sensors/init.py
@ -0,0 +1 @@
 from .camera import Camera
--- a/env_manager/rbs_gym/rbs_gym/envs/models/sensors/camera.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/sensors/camera.py
@ -0,0 +1,324 @@
 import os
 from threading import Thread
 from typing import List, Optional, Union
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 from rbs_gym.envs.models.utils import ModelCollectionRandomizer
 class Camera(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: Union[str, None] = None,
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        static: bool = True,
        camera_type: str = "rgbd_camera",
        width: int = 212,
        height: int = 120,
        image_format: str = "R8G8B8",
        update_rate: int = 15,
        horizontal_fov: float = 1.567821,
        vertical_fov: float = 1.022238,
        clip_color: List[float] = (0.02, 1000.0),
        clip_depth: List[float] = (0.02, 10.0),
        noise_mean: float = None,
        noise_stddev: float = None,
        ros2_bridge_color: bool = False,
        ros2_bridge_depth: bool = False,
        ros2_bridge_points: bool = False,
        visibility_mask: int = 0,
        visual: Optional[str] = None,
        # visual: Optional[str] = "intel_realsense_d435",
    ):
        # Get a unique model name
        if name is not None:
            model_name = get_unique_model_name(world, name)
        else:
            model_name = get_unique_model_name(world, camera_type)
        self._model_name = model_name
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Get resources for visual (if enabled)
        if visual:
            use_mesh: bool = False
            if "intel_realsense_d435" == visual:
                use_mesh = True
                # Get path to the model and the important directories
                model_path = ModelCollectionRandomizer.get_collection_paths(
                    owner="OpenRobotics",
                    collection="",
                    model_name="Intel RealSense D435",
                )[0]
                mesh_dir = os.path.join(model_path, "meshes")
                texture_dir = os.path.join(model_path, "materials", "textures")
                # Get path to the mesh
                mesh_path_visual = os.path.join(mesh_dir, "realsense.dae")
                # Make sure that it exists
                if not os.path.exists(mesh_path_visual):
                    raise ValueError(
                        f"Visual mesh '{mesh_path_visual}' for Camera model is not a valid file."
                    )
                # Find PBR textures
                albedo_map = None
                normal_map = None
                roughness_map = None
                metalness_map = None
                if texture_dir:
                    # List all files
                    texture_files = os.listdir(texture_dir)
                    # Extract the appropriate files
                    for texture in texture_files:
                        texture_lower = texture.lower()
                        if "basecolor" in texture_lower or "albedo" in texture_lower:
                            albedo_map = os.path.join(texture_dir, texture)
                        elif "normal" in texture_lower:
                            normal_map = os.path.join(texture_dir, texture)
                        elif "roughness" in texture_lower:
                            roughness_map = os.path.join(texture_dir, texture)
                        elif (
                            "specular" in texture_lower or "metalness" in texture_lower
                        ):
                            metalness_map = os.path.join(texture_dir, texture)
                if not (albedo_map and normal_map and roughness_map and metalness_map):
                    raise ValueError(f"Not all textures for Camera model were found.")
        # Create SDF string for the model
        sdf = f'''<sdf version="1.9">
            <model name="{model_name}">
                <static>{static}</static>
                <link name="{self.link_name}">
                    <sensor name="camera" type="{camera_type}">
                        <topic>{model_name}</topic>
                        <always_on>true</always_on>
                        <update_rate>{update_rate}</update_rate>
                        <camera name="{model_name}_camera">
                            <image>
                                <width>{width}</width>
                                <height>{height}</height>
                                <format>{image_format}</format>
                            </image>
                            <horizontal_fov>{horizontal_fov}</horizontal_fov>
                            <vertical_fov>{vertical_fov}</vertical_fov>
                            <clip>
                                <near>{clip_color[0]}</near>
                                <far>{clip_color[1]}</far>
                            </clip>
                            {
                            f"""<depth_camera>
                                <clip>
                                    <near>{clip_depth[0]}</near>
                                    <far>{clip_depth[1]}</far>
                                </clip>
                            </depth_camera>""" if "rgbd" in model_name else ""
                            }
                            {
                            f"""<noise>
                                <type>gaussian</type>
                                <mean>{noise_mean}</mean>
                                <stddev>{noise_stddev}</stddev>
                            </noise>""" if noise_mean is not None and noise_stddev is not None else ""
                            }
                            <visibility_mask>{visibility_mask}</visibility_mask>
                        </camera>
                        <visualize>true</visualize>
                    </sensor>
                    {
                        f"""
                        <visual name="{model_name}_visual_lens">
                            <pose>-0.01 0 0 0 1.5707963 0</pose>
                            <geometry>
                                <cylinder>
                                    <radius>0.02</radius>
                                    <length>0.02</length>
                                </cylinder>
                            </geometry>
                            <material>
                                <ambient>0.0 0.8 0.0</ambient>
                                <diffuse>0.0 0.8 0.0</diffuse>
                                <specular>0.0 0.8 0.0</specular>
                            </material>
                        </visual>
                        <visual name="{model_name}_visual_body">
                            <pose>-0.05 0 0 0 0 0</pose>
                            <geometry>
                                <box>
                                    <size>0.06 0.05 0.05</size>
                                </box>
                            </geometry>
                            <material>
                                <ambient>0.0 0.8 0.0</ambient>
                                <diffuse>0.0 0.8 0.0</diffuse>
                                <specular>0.0 0.8 0.0</specular>
                            </material>
                        </visual>
                        """ if visual and not use_mesh else ""
                        }
                        {
                        f"""
                        <inertial>
                            <mass>0.0615752</mass>
                            <inertia>
                                <ixx>9.108e-05</ixx>
                                <ixy>0.0</ixy>
                                <ixz>0.0</ixz>
                                <iyy>2.51e-06</iyy>
                                <iyz>0.0</iyz>
                                <izz>8.931e-05</izz>
                            </inertia>
                        </inertial>
                        <visual name="{model_name}_visual">
                            <pose>0 0 0 0 0 1.5707963</pose>
                            <geometry>
                                <mesh>
                                    <uri>{mesh_path_visual}</uri>
                                    <submesh>
                                        <name>RealSense</name>
                                        <center>false</center>
                                    </submesh>
                                </mesh>
                            </geometry>
                            <material>
                                <diffuse>1 1 1 1</diffuse>
                                <specular>1 1 1 1</specular>
                                <pbr>
                                    <metal>
                                        <albedo_map>{albedo_map}</albedo_map>
                                        <normal_map>{normal_map}</normal_map>
                                        <roughness_map>{roughness_map}</roughness_map>
                                        <metalness_map>{metalness_map}</metalness_map>
                                    </metal>
                                </pbr>
                            </material>
                        </visual>
                        """ if visual and use_mesh else ""
                        }
                </link>
            </model>
        </sdf>'''
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
        if ros2_bridge_color or ros2_bridge_depth or ros2_bridge_points:
            self.__threads = []
            if ros2_bridge_color:
                self.__threads.append(
                    Thread(
                        target=self.construct_ros2_bridge,
                        args=(
                            self.color_topic,
                            "sensor_msgs/msg/Image",
                            "ignition.msgs.Image",
                        ),
                        daemon=True,
                    )
                )
            if ros2_bridge_depth:
                self.__threads.append(
                    Thread(
                        target=self.construct_ros2_bridge,
                        args=(
                            self.depth_topic,
                            "sensor_msgs/msg/Image",
                            "ignition.msgs.Image",
                        ),
                        daemon=True,
                    )
                )
            if ros2_bridge_points:
                self.__threads.append(
                    Thread(
                        target=self.construct_ros2_bridge,
                        args=(
                            self.points_topic,
                            "sensor_msgs/msg/PointCloud2",
                            "ignition.msgs.PointCloudPacked",
                        ),
                        daemon=True,
                    )
                )
            for thread in self.__threads:
                thread.start()
    def __del__(self):
        if hasattr(self, "__threads"):
            for thread in self.__threads:
                thread.join()
    @classmethod
    def construct_ros2_bridge(self, topic: str, ros_msg: str, ign_msg: str):
        node_name = "parameter_bridge" + topic.replace("/", "_")
        command = (
            f"ros2 run ros_ign_bridge parameter_bridge {topic}@{ros_msg}[{ign_msg} "
            + f"--ros-args --remap __node:={node_name} --ros-args -p use_sim_time:=true"
        )
        os.system(command)
    @classmethod
    def get_frame_id(cls, model_name: str) -> str:
        return f"{model_name}/{model_name}_link/camera"
    @property
    def frame_id(self) -> str:
        return self.get_frame_id(self._model_name)
    @classmethod
    def get_color_topic(cls, model_name: str) -> str:
        return f"/{model_name}/image" if "rgbd" in model_name else f"/{model_name}"
    @property
    def color_topic(self) -> str:
        return self.get_color_topic(self._model_name)
    @classmethod
    def get_depth_topic(cls, model_name: str) -> str:
        return (
            f"/{model_name}/depth_image" if "rgbd" in model_name else f"/{model_name}"
        )
    @property
    def depth_topic(self) -> str:
        return self.get_depth_topic(self._model_name)
    @classmethod
    def get_points_topic(cls, model_name: str) -> str:
        return f"/{model_name}/points"
    @property
    def points_topic(self) -> str:
        return self.get_points_topic(self._model_name)
    @classmethod
    def get_link_name(cls, model_name: str) -> str:
        return f"{model_name}_link"
    @property
    def link_name(self) -> str:
        return self.get_link_name(self._model_name)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/init.py
@ -0,0 +1,27 @@
 from gym_gz.scenario.model_wrapper import ModelWrapper
 from .ground import Ground
 from .lunar_heightmap import LunarHeightmap
 from .lunar_surface import LunarSurface
 from .random_ground import RandomGround
 from .random_lunar_surface import RandomLunarSurface
 def get_terrain_model_class(terrain_type: str) -> ModelWrapper:
    # TODO: Refactor into enum
    if "flat" == terrain_type:
        return Ground
    elif "random_flat" == terrain_type:
        return RandomGround
    elif "lunar_heightmap" == terrain_type:
        return LunarHeightmap
    elif "lunar_surface" == terrain_type:
        return LunarSurface
    elif "random_lunar_surface" == terrain_type:
        return RandomLunarSurface
 def is_terrain_type_randomizable(terrain_type: str) -> bool:
    return "random_flat" == terrain_type or "random_lunar_surface" == terrain_type
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/ground.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/ground.py
@ -0,0 +1,80 @@
 from typing import List
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils import misc
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 class Ground(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "ground",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        size: List[float] = (1.5, 1.5),
        collision_thickness=0.05,
        friction: float = 5.0,
        **kwargs,
    ):
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Create SDF string for the model
        sdf = f"""<sdf version="1.9">
            <model name="{model_name}">
                <static>true</static>
                <link name="{model_name}_link">
                    <collision name="{model_name}_collision">
                        <geometry>
                            <plane>
                                <normal>0 0 1</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <surface>
                            <friction>
                                <ode>
                                    <mu>{friction}</mu>
                                    <mu2>{friction}</mu2>
                                    <fdir1>0 0 0</fdir1>
                                    <slip1>0.0</slip1>
                                    <slip2>0.0</slip2>
                                </ode>
                            </friction>
                        </surface>
                    </collision>
                    <visual name="{model_name}_visual">
                        <geometry>
                            <plane>
                                <normal>0 0 1</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <material>
                            <ambient>0.8 0.8 0.8 1</ambient>
                            <diffuse>0.8 0.8 0.8 1</diffuse>
                            <specular>0.8 0.8 0.8 1</specular>
                        </material>
                    </visual>
                </link>
            </model>
        </sdf>"""
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_heightmap.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_heightmap.py
@ -0,0 +1,52 @@
 from typing import List
 from gym_gz.scenario import model_with_file, model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 from scenario import gazebo as scenario_gazebo
 class LunarHeightmap(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
    def __init__(
        self,
        world: scenario.World,
        name: str = "lunar_heightmap",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        model_file: str = None,
        use_fuel: bool = False,
        **kwargs,
    ):
        # Allow passing of custom model file as an argument
        if model_file is None:
            model_file = self.get_model_file(fuel=use_fuel)
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_file(
            model_file, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        super().__init__(model=model)
    @classmethod
    def get_model_file(self, fuel: bool = False) -> str:
        if fuel:
            raise NotImplementedError
            return scenario_gazebo.get_model_file_from_fuel(
                "https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/lunar_heightmap"
            )
        else:
            return "lunar_heightmap"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_surface.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_surface.py
@ -0,0 +1,53 @@
 from typing import List
 from gym_gz.scenario import model_with_file, model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from scenario import core as scenario
 from scenario import gazebo as scenario_gazebo
 class LunarSurface(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
    def __init__(
        self,
        world: scenario.World,
        name: str = "lunar_surface",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        model_file: str = None,
        use_fuel: bool = False,
        variant: str = "tycho",
        **kwargs,
    ):
        # Allow passing of custom model file as an argument
        if model_file is None:
            model_file = self.get_model_file(fuel=use_fuel, variant=variant)
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_file(
            model_file, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        super().__init__(model=model)
    @classmethod
    def get_model_file(self, fuel: bool = False, variant: str = "tycho") -> str:
        if fuel:
            raise NotImplementedError
            return scenario_gazebo.get_model_file_from_fuel(
                f"https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/lunar_surface_{variant}"
            )
        else:
            return f"lunar_surface_{variant}"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_ground.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_ground.py
@ -0,0 +1,135 @@
 import os
 from typing import List, Optional
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 class RandomGround(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "random_ground",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        size: List[float] = (1.0, 1.0),
        collision_thickness: float = 0.05,
        friction: float = 5.0,
        texture_dir: Optional[str] = None,
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        if np_random is None:
            np_random = np.random.default_rng()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Get textures from ENV variable if not directly specified
        if not texture_dir:
            texture_dir = os.environ.get("TEXTURE_DIRS", default="")
        # Find random PBR texture
        albedo_map = None
        normal_map = None
        roughness_map = None
        metalness_map = None
        if texture_dir:
            if ":" in texture_dir:
                textures = []
                for d in texture_dir.split(":"):
                    textures.extend([os.path.join(d, f) for f in os.listdir(d)])
            else:
                # Get list of the available textures
                textures = os.listdir(texture_dir)
            # Choose a random texture from these
            random_texture_dir = str(np_random.choice(textures))
            # List all files
            texture_files = os.listdir(random_texture_dir)
            # Extract the appropriate files
            for texture in texture_files:
                texture_lower = texture.lower()
                if "color" in texture_lower or "albedo" in texture_lower:
                    albedo_map = os.path.join(random_texture_dir, texture)
                elif "normal" in texture_lower:
                    normal_map = os.path.join(random_texture_dir, texture)
                elif "roughness" in texture_lower:
                    roughness_map = os.path.join(random_texture_dir, texture)
                elif "specular" in texture_lower or "metalness" in texture_lower:
                    metalness_map = os.path.join(random_texture_dir, texture)
        # Create SDF string for the model
        sdf = f"""<sdf version="1.9">
            <model name="{model_name}">
                <static>true</static>
                <link name="{model_name}_link">
                    <collision name="{model_name}_collision">
                        <geometry>
                            <plane>
                                <normal>0 0 1</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <surface>
                            <friction>
                                <ode>
                                    <mu>{friction}</mu>
                                    <mu2>{friction}</mu2>
                                    <fdir1>0 0 0</fdir1>
                                    <slip1>0.0</slip1>
                                    <slip2>0.0</slip2>
                                </ode>
                            </friction>
                        </surface>
                    </collision>
                    <visual name="{model_name}_visual">
                        <geometry>
                            <plane>
                                <normal>0 0 1</normal>
                                <size>{size[0]} {size[1]}</size>
                            </plane>
                        </geometry>
                        <material>
                            <ambient>1 1 1 1</ambient>
                            <diffuse>1 1 1 1</diffuse>
                            <specular>1 1 1 1</specular>
                            <pbr>
                                <metal>
                                    {"<albedo_map>%s</albedo_map>"
                                        % albedo_map if albedo_map is not None else ""}
                                    {"<normal_map>%s</normal_map>"
                                        % normal_map if normal_map is not None else ""}
                                    {"<roughness_map>%s</roughness_map>"
                                        % roughness_map if roughness_map is not None else ""}
                                    {"<metalness_map>%s</metalness_map>"
                                        % metalness_map if metalness_map is not None else ""}
                                </metal>
                            </pbr>
                        </material>
                    </visual>
                </link>
            </model>
        </sdf>"""
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_string(
            sdf, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_lunar_surface.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_lunar_surface.py
@ -0,0 +1,57 @@
 import os
 from typing import List, Optional, Tuple
 import numpy as np
 from gym_gz.scenario import model_wrapper
 from gym_gz.utils.scenario import get_unique_model_name
 from numpy.random import RandomState
 from scenario import core as scenario
 class RandomLunarSurface(model_wrapper.ModelWrapper):
    def __init__(
        self,
        world: scenario.World,
        name: str = "lunar_surface",
        position: List[float] = (0, 0, 0),
        orientation: List[float] = (1, 0, 0, 0),
        models_dir: Optional[str] = None,
        np_random: Optional[RandomState] = None,
        **kwargs,
    ):
        if np_random is None:
            np_random = np.random.default_rng()
        # Get a unique model name
        model_name = get_unique_model_name(world, name)
        # Setup initial pose
        initial_pose = scenario.Pose(position, orientation)
        # Get path to all lunar surface models
        if not models_dir:
            models_dir = os.environ.get("SDF_PATH_LUNAR_SURFACE", default="")
        # Make sure the path exists
        if not os.path.exists(models_dir):
            raise ValueError(
                f"Invalid path '{models_dir}' pointed by 'SDF_PATH_LUNAR_SURFACE' environment variable."
            )
        # Select a single model at random
        model_dir = np_random.choice(os.listdir(models_dir))
        sdf_filepath = os.path.join(model_dir, "model.sdf")
        # Insert the model
        ok_model = world.to_gazebo().insert_model_from_file(
            sdf_filepath, initial_pose, model_name
        )
        if not ok_model:
            raise RuntimeError("Failed to insert " + model_name)
        # Get the model
        model = world.get_model(model_name)
        # Initialize base class
        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/init.py
@ -0,0 +1,2 @@
 from .model_collection_randomizer import ModelCollectionRandomizer
 from .xacro2sdf import xacro2sdf
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/model_collection_randomizer.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/model_collection_randomizer.py
@ -0,0 +1,745 @@
 import glob
 import os
 from typing import List, Optional, Tuple
 import numpy as np
 import trimesh
 from gym_gz.utils import logger
 from numpy.random import RandomState
 from pcg_gazebo.parsers import parse_sdf
 from pcg_gazebo.parsers.sdf import create_sdf_element
 from scenario import gazebo as scenario_gazebo
 # Note: only models with mesh geometry are supported
 class ModelCollectionRandomizer:
    _class_model_paths = None
    __sdf_base_name = "model.sdf"
    __configured_sdf_base_name = "model_modified.sdf"
    __blacklisted_base_name = "BLACKLISTED"
    __collision_mesh_dir = "meshes/collision/"
    __collision_mesh_file_type = "stl"
    __original_scale_base_name = "original_scale.txt"
    def __init__(
        self,
        model_paths=None,
        owner="GoogleResearch",
        collection="Google Scanned Objects",
        server="https://fuel.ignitionrobotics.org",
        server_version="1.0",
        unique_cache=False,
        reset_collection=False,
        enable_blacklisting=True,
        np_random: Optional[RandomState] = None,
    ):
        # If enabled, the newly created objects of this class will use its own individual cache
        # for model paths and must discover/download them on its own
        self._unique_cache = unique_cache
        # Flag that determines if models that cannot be used are blacklisted
        self._enable_blacklisting = enable_blacklisting
        # If enabled, the cache of the class used to store model paths among instances will be reset
        if reset_collection and not self._unique_cache:
            self._class_model_paths = None
        # Get file path to all models from
        # a) `model_paths` arg
        # b) local cache owner (if `owner` has some models, i.e `collection` is already downloaded)
        # c) Fuel collection (if `owner` has no models in local cache)
        if model_paths is not None:
            # Use arg
            if self._unique_cache:
                self._model_paths = model_paths
            else:
                self._class_model_paths = model_paths
        else:
            # Use local cache or Fuel
            if self._unique_cache:
                self._model_paths = self.get_collection_paths(
                    owner=owner,
                    collection=collection,
                    server=server,
                    server_version=server_version,
                )
            elif self._class_model_paths is None:
                # Executed only once, unless the paths are reset with `reset_collection` arg
                self._class_model_paths = self.get_collection_paths(
                    owner=owner,
                    collection=collection,
                    server=server,
                    server_version=server_version,
                )
        # Initialise rng with (with seed is desired)
        if np_random is not None:
            self.np_random = np_random
        else:
            self.np_random = np.random.default_rng()
    @classmethod
    def get_collection_paths(
        cls,
        owner="GoogleResearch",
        collection="Google Scanned Objects",
        server="https://fuel.ignitionrobotics.org",
        server_version="1.0",
        model_name: str = "",
    ) -> List[str]:
        # First check the local cache (for performance)
        # Note: This unfortunately does not check if models belong to the specified collection
        # TODO: Make sure models belong to the collection if sampled from local cache
        model_paths = scenario_gazebo.get_local_cache_model_paths(
            owner=owner, name=model_name
        )
        if len(model_paths) > 0:
            return model_paths
        # Else download the models from Fuel and then try again
        if collection:
            download_uri = "%s/%s/%s/collections/%s" % (
                server,
                server_version,
                owner,
                collection,
            )
        elif model_name:
            download_uri = "%s/%s/%s/models/%s" % (
                server,
                server_version,
                owner,
                model_name,
            )
        download_command = 'ign fuel download -v 3 -t model -j %s -u "%s"' % (
            os.cpu_count(),
            download_uri,
        )
        os.system(download_command)
        model_paths = scenario_gazebo.get_local_cache_model_paths(
            owner=owner, name=model_name
        )
        if 0 == len(model_paths):
            logger.error(
                'URI "%s" is not valid and does not contain any models that are \
                          owned by the owner of the collection'
                % download_uri
            )
            pass
        return model_paths
    def random_model(
        self,
        min_scale=0.125,
        max_scale=0.175,
        min_mass=0.05,
        max_mass=0.25,
        min_friction=0.75,
        max_friction=1.5,
        decimation_fraction_of_visual=0.25,
        decimation_min_faces=40,
        decimation_max_faces=200,
        max_faces=40000,
        max_vertices=None,
        component_min_faces_fraction=0.1,
        component_max_volume_fraction=0.35,
        fix_mtl_texture_paths=True,
        skip_blacklisted=True,
        return_sdf_path=True,
    ) -> str:
        # Loop until a model is found, checked for validity, configured and returned
        # If any of these steps fail, sample another model and try again
        # Note: Due to this behaviour, the function could stall if all models are invalid
        # TODO: Add a simple timeout to random sampling of valid model (# of attempts or time-based)
        while True:
            # Get path to a random model from the collection
            model_path = self.get_random_model_path()
            # Check if the model is already blacklisted and skip if desired
            if skip_blacklisted and self.is_blacklisted(model_path):
                continue
            # Check is the model is already configured
            if self.is_configured(model_path):
                # If so, break the loop
                break
            # Process the model and break loop only if it is valid
            if self.process_model(
                model_path,
                decimation_fraction_of_visual=decimation_fraction_of_visual,
                decimation_min_faces=decimation_min_faces,
                decimation_max_faces=decimation_max_faces,
                max_faces=max_faces,
                max_vertices=max_vertices,
                component_min_faces_fraction=component_min_faces_fraction,
                component_max_volume_fraction=component_max_volume_fraction,
                fix_mtl_texture_paths=fix_mtl_texture_paths,
            ):
                break
        # Apply randomization
        self.randomize_configured_model(
            model_path,
            min_scale=min_scale,
            max_scale=max_scale,
            min_friction=min_friction,
            max_friction=max_friction,
            min_mass=min_mass,
            max_mass=max_mass,
        )
        if return_sdf_path:
            # Return path to the configured SDF file
            return self.get_configured_sdf_path(model_path)
        else:
            # Return path to the model directory
            return model_path
    def process_all_models(
        self,
        decimation_fraction_of_visual=0.025,
        decimation_min_faces=8,
        decimation_max_faces=400,
        max_faces=40000,
        max_vertices=None,
        component_min_faces_fraction=0.1,
        component_max_volume_fraction=0.35,
        fix_mtl_texture_paths=True,
    ):
        if self._unique_cache:
            model_paths = self._model_paths
        else:
            model_paths = self._class_model_paths
        blacklist_model_counter = 0
        for i in range(len(model_paths)):
            if not self.process_model(
                model_paths[i],
                decimation_fraction_of_visual=decimation_fraction_of_visual,
                decimation_min_faces=decimation_min_faces,
                decimation_max_faces=decimation_max_faces,
                max_faces=max_faces,
                max_vertices=max_vertices,
                component_min_faces_fraction=component_min_faces_fraction,
                component_max_volume_fraction=component_max_volume_fraction,
                fix_mtl_texture_paths=fix_mtl_texture_paths,
            ):
                blacklist_model_counter += 1
            print('Processed model %i/%i "%s"' % (i, len(model_paths), model_paths[i]))
        print("Number of blacklisted models: %i" % blacklist_model_counter)
    def process_model(
        self,
        model_path,
        decimation_fraction_of_visual=0.25,
        decimation_min_faces=40,
        decimation_max_faces=200,
        max_faces=40000,
        max_vertices=None,
        component_min_faces_fraction=0.1,
        component_max_volume_fraction=0.35,
        fix_mtl_texture_paths=True,
    ) -> bool:
        # Parse the SDF of the model
        sdf = parse_sdf(self.get_sdf_path(model_path))
        # Process the model(s) contained in the SDF
        for model in sdf.models:
            # Process the link(s) of each model
            for link in model.links:
                # Get rid of the existing collisions prior to simplifying it
                link.collisions.clear()
                # Values for the total inertial properties of current link
                # These values will be updated for each body that the link contains
                total_mass = 0.0
                total_inertia = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
                common_centre_of_mass = [0.0, 0.0, 0.0]
                # Go through the visuals and process them
                for visual in link.visuals:
                    # Get path to the mesh of the link's visual
                    mesh_path = self.get_mesh_path(model_path, visual)
                    # If desired, fix texture path in 'mtl' files for '.obj' mesh format
                    if fix_mtl_texture_paths:
                        self.fix_mtl_texture_paths(
                            model_path, mesh_path, model.attributes["name"]
                        )
                    # Load the mesh (without materials)
                    mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
                    # Check if model has too much geometry (blacklist if needed)
                    if not self.check_excessive_geometry(
                        mesh, model_path, max_faces=max_faces, max_vertices=max_vertices
                    ):
                        return False
                    # Check if model has disconnected geometry/components (blacklist if needed)
                    if not self.check_disconnected_components(
                        mesh,
                        model_path,
                        component_min_faces_fraction=component_min_faces_fraction,
                        component_max_volume_fraction=component_max_volume_fraction,
                    ):
                        return False
                    # Compute inertial properties for this mesh
                    (
                        total_mass,
                        total_inertia,
                        common_centre_of_mass,
                    ) = self.sum_inertial_properties(
                        mesh, total_mass, total_inertia, common_centre_of_mass
                    )
                    # Add decimated collision geometry to the SDF
                    self.add_collision(
                        mesh,
                        link,
                        model_path,
                        fraction_of_visual=decimation_fraction_of_visual,
                        min_faces=decimation_min_faces,
                        max_faces=decimation_max_faces,
                    )
                    # Write original scale (size) into the SDF
                    # This is used for later reference during randomization (for scale limits)
                    self.write_original_scale(mesh, model_path)
                # Make sure the link has valid inertial properties (blacklist if needed)
                if not self.check_inertial_properties(
                    model_path, total_mass, total_inertia
                ):
                    return False
                # Write inertial properties to the SDF of the link
                self.write_inertial_properties(
                    link, total_mass, total_inertia, common_centre_of_mass
                )
        # Write the configured SDF into a file
        sdf.export_xml(self.get_configured_sdf_path(model_path))
        return True
    def add_collision(
        self,
        mesh,
        link,
        model_path,
        fraction_of_visual=0.05,
        min_faces=8,
        max_faces=750,
        friction=1.0,
    ):
        # Determine name of path to the collistion geometry
        collision_name = (
            link.attributes["name"] + "_collision_" + str(len(link.collisions))
        )
        collision_mesh_path = self.get_collision_mesh_path(model_path, collision_name)
        # Determine number of faces to keep after the decimation
        face_count = min(
            max(fraction_of_visual * len(mesh.faces), min_faces), max_faces
        )
        # Simplify mesh via decimation
        collision_mesh = mesh.simplify_quadratic_decimation(face_count)
        # Export the collision mesh to the appropriate location
        os.makedirs(os.path.dirname(collision_mesh_path), exist_ok=True)
        collision_mesh.export(
            collision_mesh_path, file_type=self.__collision_mesh_file_type
        )
        # Create collision SDF element
        collision = create_sdf_element("collision")
        # Add collision geometry to the SDF
        collision.geometry.mesh = create_sdf_element("mesh")
        collision.geometry.mesh.uri = os.path.relpath(
            collision_mesh_path, start=model_path
        )
        # Add surface friction to the SDF of collision (default to 1 and randomize later)
        collision.surface = create_sdf_element("surface")
        collision.surface.friction = create_sdf_element("friction", "surface")
        collision.surface.friction.ode = create_sdf_element("ode", "collision")
        collision.surface.friction.ode.mu = friction
        collision.surface.friction.ode.mu2 = friction
        # Add it to the SDF of the link
        collision_name = os.path.basename(collision_mesh_path).split(".")[0]
        link.add_collision(collision_name, collision)
    def sum_inertial_properties(
        self, mesh, total_mass, total_inertia, common_centre_of_mass, density=1.0
    ) -> Tuple[float, float, float]:
        # Arbitrary density is used here
        # The mass will be randomized once it is fully computed for a link
        mesh.density = density
        # Tmp variable to store the mass of all previous geometry, used to determine centre of mass
        mass_of_others = total_mass
        # For each additional mesh, simply add the mass and inertia
        total_mass += mesh.mass
        total_inertia += mesh.moment_inertia
        # Compute a common centre of mass between all previous geometry and the new mesh
        common_centre_of_mass = [
            mass_of_others * common_centre_of_mass[0] + mesh.mass * mesh.center_mass[0],
            mass_of_others * common_centre_of_mass[1] + mesh.mass * mesh.center_mass[1],
            mass_of_others * common_centre_of_mass[2] + mesh.mass * mesh.center_mass[2],
        ] / total_mass
        return total_mass, total_inertia, common_centre_of_mass
    def randomize_configured_model(
        self,
        model_path,
        min_scale=0.05,
        max_scale=0.25,
        min_mass=0.1,
        max_mass=3.0,
        min_friction=0.75,
        max_friction=1.5,
    ):
        # Get path to the configured SDF file
        configured_sdf_path = self.get_configured_sdf_path(model_path)
        # Parse the configured SDF that needs to be randomized
        sdf = parse_sdf(configured_sdf_path)
        # Process the model(s) contained in the SDF
        for model in sdf.models:
            # Process the link(s) of each model
            for link in model.links:
                # Randomize scale of the link
                self.randomize_scale(
                    model_path, link, min_scale=min_scale, max_scale=max_scale
                )
                # Randomize inertial properties of the link
                self.randomize_inertial(link, min_mass=min_mass, max_mass=max_mass)
                # Randomize friction of the link
                self.randomize_friction(
                    link, min_friction=min_friction, max_friction=max_friction
                )
        # Overwrite the configured SDF file with randomized values
        sdf.export_xml(configured_sdf_path)
    def randomize_scale(self, model_path, link, min_scale=0.05, max_scale=0.25):
        # Note: This function currently supports only scaling of links with single mesh geometry
        if len(link.visuals) > 1:
            return False
        # Get a random scale for the size of mesh
        random_scale = self.np_random.uniform(min_scale, max_scale)
        # Determine a scale factor that will result in such scale for the size of mesh
        original_mesh_scale = self.read_original_scale(model_path)
        scale_factor = random_scale / original_mesh_scale
        # Determine scale factor for inertial properties based on random scale and current scale
        current_scale = link.visuals[0].geometry.mesh.scale.value[0]
        inertial_scale_factor = scale_factor / current_scale
        # Write scale factor into SDF for visual and collision geometry
        link.visuals[0].geometry.mesh.scale = [scale_factor] * 3
        link.collisions[0].geometry.mesh.scale = [scale_factor] * 3
        # Recompute inertial properties according to the scale
        link.inertial.pose.x *= inertial_scale_factor
        link.inertial.pose.y *= inertial_scale_factor
        link.inertial.pose.z *= inertial_scale_factor
        # Mass is scaled n^3
        link.mass = link.mass.value * inertial_scale_factor**3
        # Inertia is scaled n^5
        inertial_scale_factor_n5 = inertial_scale_factor**5
        link.inertia.ixx = link.inertia.ixx.value * inertial_scale_factor_n5
        link.inertia.iyy = link.inertia.iyy.value * inertial_scale_factor_n5
        link.inertia.izz = link.inertia.izz.value * inertial_scale_factor_n5
        link.inertia.ixy = link.inertia.ixy.value * inertial_scale_factor_n5
        link.inertia.ixz = link.inertia.ixz.value * inertial_scale_factor_n5
        link.inertia.iyz = link.inertia.iyz.value * inertial_scale_factor_n5
    def randomize_inertial(
        self, link, min_mass=0.1, max_mass=3.0
    ) -> Tuple[float, float]:
        random_mass = self.np_random.uniform(min_mass, max_mass)
        mass_scale_factor = random_mass / link.mass.value
        link.mass = random_mass
        link.inertia.ixx = link.inertia.ixx.value * mass_scale_factor
        link.inertia.iyy = link.inertia.iyy.value * mass_scale_factor
        link.inertia.izz = link.inertia.izz.value * mass_scale_factor
        link.inertia.ixy = link.inertia.ixy.value * mass_scale_factor
        link.inertia.ixz = link.inertia.ixz.value * mass_scale_factor
        link.inertia.iyz = link.inertia.iyz.value * mass_scale_factor
    def randomize_friction(self, link, min_friction=0.75, max_friction=1.5):
        for collision in link.collisions:
            random_friction = self.np_random.uniform(min_friction, max_friction)
            collision.surface.friction.ode.mu = random_friction
            collision.surface.friction.ode.mu2 = random_friction
    def write_inertial_properties(self, link, mass, inertia, centre_of_mass):
        link.mass = mass
        link.inertia.ixx = inertia[0][0]
        link.inertia.iyy = inertia[1][1]
        link.inertia.izz = inertia[2][2]
        link.inertia.ixy = inertia[0][1]
        link.inertia.ixz = inertia[0][2]
        link.inertia.iyz = inertia[1][2]
        link.inertial.pose = [
            centre_of_mass[0],
            centre_of_mass[1],
            centre_of_mass[2],
            0.0,
            0.0,
            0.0,
        ]
    def write_original_scale(self, mesh, model_path):
        file = open(self.get_original_scale_path(model_path), "w")
        file.write(str(mesh.scale))
        file.close()
    def read_original_scale(self, model_path) -> float:
        file = open(self.get_original_scale_path(model_path), "r")
        original_scale = file.read()
        file.close()
        return float(original_scale)
    def check_excessive_geometry(
        self, mesh, model_path, max_faces=40000, max_vertices=None
    ) -> bool:
        if max_faces is not None:
            num_faces = len(mesh.faces)
            if num_faces > max_faces:
                self.blacklist_model(
                    model_path, reason="Excessive geometry (%d faces)" % num_faces
                )
                return False
        if max_vertices is not None:
            num_vertices = len(mesh.vertices)
            if num_vertices > max_vertices:
                self.blacklist_model(
                    model_path, reason="Excessive geometry (%d vertices)" % num_vertices
                )
                return False
        return True
    def check_disconnected_components(
        self,
        mesh,
        model_path,
        component_min_faces_fraction=0.05,
        component_max_volume_fraction=0.1,
    ) -> bool:
        # Get a list of all connected componends inside the mesh
        # Consider components only with `component_min_faces_fraction` percent faces
        min_faces = round(component_min_faces_fraction * len(mesh.faces))
        connected_components = trimesh.graph.connected_components(
            mesh.face_adjacency, min_len=min_faces
        )
        # If more than 1 objects were detected, consider also relative volume of the meshes
        if len(connected_components) > 1:
            total_volume = mesh.volume
            large_component_counter = 0
            for component in connected_components:
                submesh = mesh.copy()
                mask = np.zeros(len(mesh.faces), dtype=np.bool)
                mask[component] = True
                submesh.update_faces(mask)
                volume_fraction = submesh.volume / total_volume
                if volume_fraction > component_max_volume_fraction:
                    large_component_counter += 1
                if large_component_counter > 1:
                    self.blacklist_model(
                        model_path,
                        reason="Disconnected components (%d instances)"
                        % len(connected_components),
                    )
                    return False
        return True
    def check_inertial_properties(self, model_path, mass, inertia) -> bool:
        if (
            mass < 1e-10
            or inertia[0][0] < 1e-10
            or inertia[1][1] < 1e-10
            or inertia[2][2] < 1e-10
        ):
            self.blacklist_model(model_path, reason="Invalid inertial properties")
            return False
        return True
    def get_random_model_path(self) -> str:
        if self._unique_cache:
            return self.np_random.choice(self._model_paths)
        else:
            return self.np_random.choice(self._class_model_paths)
    def get_collision_mesh_path(self, model_path, collision_name) -> str:
        return os.path.join(
            model_path,
            self.__collision_mesh_dir,
            collision_name + "." + self.__collision_mesh_file_type,
        )
    def get_sdf_path(self, model_path) -> str:
        return os.path.join(model_path, self.__sdf_base_name)
    def get_configured_sdf_path(self, model_path) -> str:
        return os.path.join(model_path, self.__configured_sdf_base_name)
    def get_blacklisted_path(self, model_path) -> str:
        return os.path.join(model_path, self.__blacklisted_base_name)
    def get_mesh_path(self, model_path, visual_or_collision) -> str:
        # TODO: This might need fixing for certain collections/models
        mesh_uri = visual_or_collision.geometry.mesh.uri.value
        return os.path.join(model_path, mesh_uri)
    def get_original_scale_path(self, model_path) -> str:
        return os.path.join(model_path, self.__original_scale_base_name)
    def blacklist_model(self, model_path, reason="Unknown"):
        if self._enable_blacklisting:
            bl_file = open(self.get_blacklisted_path(model_path), "w")
            bl_file.write(reason)
            bl_file.close()
        logger.warn(
            '%s model "%s". Reason: %s.'
            % (
                "Blacklisting" if self._enable_blacklisting else "Skipping",
                model_path,
                reason,
            )
        )
    def is_blacklisted(self, model_path) -> bool:
        return os.path.isfile(self.get_blacklisted_path(model_path))
    def is_configured(self, model_path) -> bool:
        return os.path.isfile(self.get_configured_sdf_path(model_path))
    def fix_mtl_texture_paths(self, model_path, mesh_path, model_name):
        # The `.obj` files use mtl
        if mesh_path.endswith(".obj"):
            # Find all textures located in the model path, used later to relative linking
            texture_files = glob.glob(os.path.join(model_path, "**", "textures", "*.*"))
            # Find location of mtl file, if any
            mtllib_file = None
            with open(mesh_path, "r") as file:
                for line in file:
                    if "mtllib" in line:
                        mtllib_file = line.split(" ")[-1].strip()
                        break
            if mtllib_file is not None:
                mtllib_file = os.path.join(os.path.dirname(mesh_path), mtllib_file)
                fin = open(mtllib_file, "r")
                data = fin.read()
                for line in data.splitlines():
                    if "map_" in line:
                        # Find the name of the texture/map in the mtl
                        map_file = line.split(" ")[-1].strip()
                        # Find the first match of the texture/map file
                        for texture_file in texture_files:
                            if os.path.basename(
                                texture_file
                            ) == map_file or os.path.basename(
                                texture_file
                            ) == os.path.basename(
                                map_file
                            ):
                                # Make the file unique to the model (unless it already is)
                                if model_name in texture_file:
                                    new_texture_file_name = texture_file
                                else:
                                    new_texture_file_name = texture_file.replace(
                                        map_file, model_name + "_" + map_file
                                    )
                                os.rename(texture_file, new_texture_file_name)
                                # Apply the correct relative path
                                data = data.replace(
                                    map_file,
                                    os.path.relpath(
                                        new_texture_file_name,
                                        start=os.path.dirname(mesh_path),
                                    ),
                                )
                                break
                fin.close()
                # Write in the correct data
                fout = open(mtllib_file, "w")
                fout.write(data)
                fout.close()
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/xacro2sdf.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/xacro2sdf.py
@ -0,0 +1,37 @@
 import subprocess
 import tempfile
 from typing import Dict, Optional, Tuple
 import xacro
 def xacro2sdf(
    input_file_path: str, mappings: Dict, model_path_remap: Optional[Tuple[str, str]]
 ) -> str:
    """Convert xacro (URDF variant) with given arguments to SDF and return as a string."""
    # Convert all values in mappings to strings
    for keys, values in mappings.items():
        mappings[keys] = str(values)
    # Convert xacro to URDF
    urdf_xml = xacro.process(input_file_name=input_file_path, mappings=mappings)
    # Create temporary file for URDF (`ign sdf -p` accepts only files)
    with tempfile.NamedTemporaryFile() as tmp_urdf:
        with open(tmp_urdf.name, "w") as urdf_file:
            urdf_file.write(urdf_xml)
        # Convert to SDF
        result = subprocess.run(
            ["ign", "sdf", "-p", tmp_urdf.name], stdout=subprocess.PIPE
        )
        sdf_xml = result.stdout.decode("utf-8")
        # Remap package name to model name, such that meshes can be located by Ignition
        if model_path_remap is not None:
            sdf_xml = sdf_xml.replace(model_path_remap[0], model_path_remap[1])
        # Return as string
        return sdf_xml
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/init.py
@ -0,0 +1,4 @@
 from .camera_subscriber import CameraSubscriber, CameraSubscriberStandalone
 from .twist_subscriber import TwistSubscriber
 from .joint_states import JointStates
 # from .octree import OctreeCreator
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/camera_subscriber.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/camera_subscriber.py
@ -0,0 +1,118 @@
 import sys
 from threading import Lock, Thread
 from typing import Optional, Union
 import rclpy
 from rclpy.callback_groups import CallbackGroup
 from rclpy.executors import SingleThreadedExecutor
 from rclpy.node import Node
 from rclpy.parameter import Parameter
 from rclpy.qos import (
    QoSDurabilityPolicy,
    QoSHistoryPolicy,
    QoSProfile,
    QoSReliabilityPolicy,
 )
 from sensor_msgs.msg import Image, PointCloud2
 class CameraSubscriber:
    def __init__(
        self,
        node: Node,
        topic: str,
        is_point_cloud: bool,
        callback_group: Optional[CallbackGroup] = None,
    ):
        self._node = node
        # Prepare the subscriber
        if is_point_cloud:
            camera_msg_type = PointCloud2
        else:
            camera_msg_type = Image
        self.__observation = camera_msg_type()
        self._node.create_subscription(
            msg_type=camera_msg_type,
            topic=topic,
            callback=self.observation_callback,
            qos_profile=QoSProfile(
                reliability=QoSReliabilityPolicy.RELIABLE,
                durability=QoSDurabilityPolicy.VOLATILE,
                history=QoSHistoryPolicy.KEEP_LAST,
                depth=1,
            ),
            callback_group=callback_group,
        )
        self.__observation_mutex = Lock()
        self.__new_observation_available = False
    def observation_callback(self, msg):
        """
        Callback for getting observation.
        """
        self.__observation_mutex.acquire()
        self.__observation = msg
        self.__new_observation_available = True
        self._node.get_logger().debug("New observation received.")
        self.__observation_mutex.release()
    def get_observation(self) -> Union[PointCloud2, Image]:
        """
        Get the last received observation.
        """
        self.__observation_mutex.acquire()
        observation = self.__observation
        self.__observation_mutex.release()
        return observation
    def reset_new_observation_checker(self):
        """
        Reset checker of new observations, i.e. `self.new_observation_available()`
        """
        self.__observation_mutex.acquire()
        self.__new_observation_available = False
        self.__observation_mutex.release()
    @property
    def new_observation_available(self):
        """
        Check if new observation is available since `self.reset_new_observation_checker()` was called
        """
        return self.__new_observation_available
 class CameraSubscriberStandalone(Node, CameraSubscriber):
    def __init__(
        self,
        topic: str,
        is_point_cloud: bool,
        node_name: str = "rbs_gym_camera_sub",
        use_sim_time: bool = True,
    ):
        try:
            rclpy.init()
        except Exception as e:
            if not rclpy.ok():
                sys.exit(f"ROS 2 context could not be initialised: {e}")
        Node.__init__(self, node_name)
        self.set_parameters(
            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
        )
        CameraSubscriber.__init__(
            self, node=self, topic=topic, is_point_cloud=is_point_cloud
        )
        # Spin the node in a separate thread
        self._executor = SingleThreadedExecutor()
        self._executor.add_node(self)
        self._executor_thread = Thread(target=self._executor.spin, daemon=True, args=())
        self._executor_thread.start()
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/joint_states.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/joint_states.py
@ -0,0 +1,107 @@
 from array import array
 from threading import Lock
 from typing import Optional
 from rclpy.callback_groups import CallbackGroup
 from rclpy.node import Node
 from rclpy.qos import (
    QoSDurabilityPolicy,
    QoSHistoryPolicy,
    QoSProfile,
    QoSReliabilityPolicy,
 )
 from sensor_msgs.msg import JointState
 class JointStates:
    def __init__(
        self,
        node: Node,
        topic: str,
        callback_group: Optional[CallbackGroup] = None,
    ):
        self._node = node
        self.__observation = JointState()
        self._node.create_subscription(
            msg_type=JointState,
            topic=topic,
            callback=self.observation_callback,
            qos_profile=QoSProfile(
                reliability=QoSReliabilityPolicy.RELIABLE,
                durability=QoSDurabilityPolicy.VOLATILE,
                history=QoSHistoryPolicy.KEEP_LAST,
                depth=1,
            ),
            callback_group=callback_group,
        )
        self.__observation_mutex = Lock()
        self.__new_observation_available = False
        self.__observation.position
    def observation_callback(self, msg):
        """
        Callback for getting observation.
        """
        self.__observation_mutex.acquire()
        self.__observation = msg
        self.__new_observation_available = True
        self._node.get_logger().debug("New observation received.")
        self.__observation_mutex.release()
    def get_observation(self) -> JointState:
        """
        Get the last received observation.
        """
        self.__observation_mutex.acquire()
        observation = self.__observation
        self.__observation_mutex.release()
        return observation
    def get_positions(self) -> array:
        """
        Get the last recorded observation position
        """
        self.__observation_mutex.acquire()
        observation = self.__observation.position
        self.__observation_mutex.release()
        return observation
    def get_velocities(self) -> array:
        """
        Get the last recorded observation velocity
        """
        self.__observation_mutex.acquire()
        observation = self.__observation.velocity
        self.__observation_mutex.release()
        return observation
    def get_efforts(self) -> array:
        """
        Get the last recorded observation effort
        """
        self.__observation_mutex.acquire()
        observation = self.__observation.effort
        self.__observation_mutex.release()
        return observation
    def reset_new_observation_checker(self):
        """
        Reset checker of new observations, i.e. `self.new_observation_available()`
        """
        self.__observation_mutex.acquire()
        self.__new_observation_available = False
        self.__observation_mutex.release()
    @property
    def new_observation_available(self):
        """
        Check if new observation is available since `self.reset_new_observation_checker()` was called
        """
        return self.__new_observation_available
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/octree.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/octree.py
@ -0,0 +1,211 @@
 from typing import List, Tuple
 import numpy as np
 import ocnn
 import open3d
 import torch
 from rclpy.node import Node
 from sensor_msgs.msg import PointCloud2
 from rbs_gym.envs.utils import Tf2Listener, conversions
 class OctreeCreator:
    def __init__(
        self,
        node: Node,
        tf2_listener: Tf2Listener,
        reference_frame_id: str,
        min_bound: Tuple[float, float, float] = (-1.0, -1.0, -1.0),
        max_bound: Tuple[float, float, float] = (1.0, 1.0, 1.0),
        include_color: bool = False,
        # Note: For efficiency, the first channel of RGB is used for intensity
        include_intensity: bool = False,
        depth: int = 4,
        full_depth: int = 2,
        adaptive: bool = False,
        adp_depth: int = 4,
        normals_radius: float = 0.05,
        normals_max_nn: int = 10,
        node_dis: bool = True,
        node_feature: bool = False,
        split_label: bool = False,
        th_normal: float = 0.1,
        th_distance: float = 2.0,
        extrapolate: bool = False,
        save_pts: bool = False,
        key2xyz: bool = False,
        debug_draw: bool = False,
        debug_write_octree: bool = False,
    ):
        self._node = node
        # Listener of tf2 transforms is shared with the owner
        self.__tf2_listener = tf2_listener
        # Parameters
        self._reference_frame_id = reference_frame_id
        self._min_bound = min_bound
        self._max_bound = max_bound
        self._include_color = include_color
        self._include_intensity = include_intensity
        self._normals_radius = normals_radius
        self._normals_max_nn = normals_max_nn
        self._debug_draw = debug_draw
        self._debug_write_octree = debug_write_octree
        # Create a converter between points and octree
        self._points_to_octree = ocnn.Points2Octree(
            depth=depth,
            full_depth=full_depth,
            node_dis=node_dis,
            node_feature=node_feature,
            split_label=split_label,
            adaptive=adaptive,
            adp_depth=adp_depth,
            th_normal=th_normal,
            th_distance=th_distance,
            extrapolate=extrapolate,
            save_pts=save_pts,
            key2xyz=key2xyz,
            bb_min=min_bound,
            bb_max=max_bound,
        )
    def __call__(self, ros_point_cloud2: PointCloud2) -> torch.Tensor:
        # Convert to Open3D PointCloud
        open3d_point_cloud = conversions.pointcloud2_to_open3d(
            ros_point_cloud2=ros_point_cloud2,
            include_color=self._include_color,
            include_intensity=self._include_intensity,
        )
        # Preprocess point cloud (transform to robot frame, crop to workspace and estimate normals)
        open3d_point_cloud = self.preprocess_point_cloud(
            open3d_point_cloud=open3d_point_cloud,
            camera_frame_id=ros_point_cloud2.header.frame_id,
            reference_frame_id=self._reference_frame_id,
            min_bound=self._min_bound,
            max_bound=self._max_bound,
            normals_radius=self._normals_radius,
            normals_max_nn=self._normals_max_nn,
        )
        # Draw if needed
        if self._debug_draw:
            open3d.visualization.draw_geometries(
                [
                    open3d_point_cloud,
                    open3d.geometry.TriangleMesh.create_coordinate_frame(
                        size=0.2, origin=[0.0, 0.0, 0.0]
                    ),
                ],
                point_show_normal=True,
            )
        # Construct octree from such point cloud
        octree = self.construct_octree(
            open3d_point_cloud,
            include_color=self._include_color,
            include_intensity=self._include_intensity,
        )
        # Write if needed
        if self._debug_write_octree:
            ocnn.write_octree(octree, "octree.octree")
        return octree
    def preprocess_point_cloud(
        self,
        open3d_point_cloud: open3d.geometry.PointCloud,
        camera_frame_id: str,
        reference_frame_id: str,
        min_bound: List[float],
        max_bound: List[float],
        normals_radius: float,
        normals_max_nn: int,
    ) -> open3d.geometry.PointCloud:
        # Check if point cloud has any points
        if not open3d_point_cloud.has_points():
            self._node.get_logger().warn(
                "Point cloud has no points. Pre-processing skipped."
            )
            return open3d_point_cloud
        # Get transformation from camera to robot and use it to transform point
        # cloud into robot's base coordinate frame
        if camera_frame_id != reference_frame_id:
            transform = self.__tf2_listener.lookup_transform_sync(
                target_frame=reference_frame_id, source_frame=camera_frame_id
            )
            transform_mat = conversions.transform_to_matrix(transform=transform)
            open3d_point_cloud = open3d_point_cloud.transform(transform_mat)
        # Crop point cloud to include only the workspace
        open3d_point_cloud = open3d_point_cloud.crop(
            bounding_box=open3d.geometry.AxisAlignedBoundingBox(
                min_bound=min_bound, max_bound=max_bound
            )
        )
        # Check if any points remain in the area after cropping
        if not open3d_point_cloud.has_points():
            self._node.get_logger().warn(
                "Point cloud has no points after cropping it to the workspace volume."
            )
            return open3d_point_cloud
        # Estimate normal vector for each cloud point and orient these towards the camera
        open3d_point_cloud.estimate_normals(
            search_param=open3d.geometry.KDTreeSearchParamHybrid(
                radius=normals_radius, max_nn=normals_max_nn
            ),
            fast_normal_computation=True,
        )
        open3d_point_cloud.orient_normals_towards_camera_location(
            camera_location=transform_mat[0:3, 3]
        )
        return open3d_point_cloud
    def construct_octree(
        self,
        open3d_point_cloud: open3d.geometry.PointCloud,
        include_color: bool,
        include_intensity: bool,
    ) -> torch.Tensor:
        # In case the point cloud has no points, add a single point
        # This is a workaround because I was not able to create an empty octree without getting a segfault
        # TODO: Figure out a better way of making an empty octree (it does not occur if setup correctly, so probably not worth it)
        if not open3d_point_cloud.has_points():
            open3d_point_cloud.points.append(
                (
                    (self._min_bound[0] + self._max_bound[0]) / 2,
                    (self._min_bound[1] + self._max_bound[1]) / 2,
                    (self._min_bound[2] + self._max_bound[2]) / 2,
                )
            )
            open3d_point_cloud.normals.append((0.0, 0.0, 0.0))
            if include_color or include_intensity:
                open3d_point_cloud.colors.append((0.0, 0.0, 0.0))
        # Convert open3d point cloud into octree points
        octree_points = conversions.open3d_point_cloud_to_octree_points(
            open3d_point_cloud=open3d_point_cloud,
            include_color=include_color,
            include_intensity=include_intensity,
        )
        # Convert octree points into 1D Tensor (via ndarray)
        # Note: Copy of points here is necessary as ndarray would otherwise be immutable
        octree_points_ndarray = np.frombuffer(np.copy(octree_points.buffer()), np.uint8)
        octree_points_tensor = torch.from_numpy(octree_points_ndarray)
        # Finally, create an octree from the points
        return self._points_to_octree(octree_points_tensor)
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/twist_subscriber.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/twist_subscriber.py
@ -0,0 +1,81 @@
 import sys
 from threading import Lock, Thread
 from typing import Optional, Union
 import rclpy
 from rclpy.callback_groups import CallbackGroup
 from rclpy.executors import SingleThreadedExecutor
 from rclpy.node import Node
 from rclpy.parameter import Parameter
 from rclpy.qos import (
    QoSDurabilityPolicy,
    QoSHistoryPolicy,
    QoSProfile,
    QoSReliabilityPolicy,
 )
 from geometry_msgs.msg import TwistStamped
 class TwistSubscriber:
    def __init__(
        self,
        node: Node,
        topic: str,
        callback_group: Optional[CallbackGroup] = None,
    ):
        self._node = node
        self.__observation = TwistStamped()
        self._node.create_subscription(
            msg_type=TwistStamped,
            topic=topic,
            callback=self.observation_callback,
            qos_profile=QoSProfile(
                reliability=QoSReliabilityPolicy.RELIABLE,
                durability=QoSDurabilityPolicy.VOLATILE,
                history=QoSHistoryPolicy.KEEP_LAST,
                depth=1,
            ),
            callback_group=callback_group,
        )
        self.__observation_mutex = Lock()
        self.__new_observation_available = False
    def observation_callback(self, msg):
        """
        Callback for getting observation.
        """
        self.__observation_mutex.acquire()
        self.__observation = msg
        self.__new_observation_available = True
        self._node.get_logger().debug("New observation received.")
        self.__observation_mutex.release()
    def get_observation(self) -> TwistStamped:
        """
        Get the last received observation.
        """
        self.__observation_mutex.acquire()
        observation = self.__observation
        self.__observation_mutex.release()
        return observation
    def reset_new_observation_checker(self):
        """
        Reset checker of new observations, i.e. `self.new_observation_available()`
        """
        self.__observation_mutex.acquire()
        self.__new_observation_available = False
        self.__observation_mutex.release()
    @property
    def new_observation_available(self):
        """
        Check if new observation is available since `self.reset_new_observation_checker()` was called
        """
        return self.__new_observation_available
--- a/env_manager/rbs_gym/rbs_gym/envs/randomizers/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/randomizers/init.py
@ -0,0 +1 @@
 from .manipulation import ManipulationGazeboEnvRandomizer
--- a/env_manager/rbs_gym/rbs_gym/envs/randomizers/manipulation.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/randomizers/manipulation.py
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/init.py
@ -0,0 +1,4 @@
 from .curriculums import *
 # from .grasp import *
 # from .grasp_planetary import *
 from .reach import *
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/init.py
@ -0,0 +1 @@
 from .grasp import GraspCurriculum
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/common.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/common.py
@ -0,0 +1,700 @@
 from __future__ import annotations
 import enum
 import itertools
 import math
 from collections import deque
 from typing import Callable, Deque, Dict, Optional, Tuple, Type
 import numpy as np
 from gym_gz.base.task import Task
 from gym_gz.utils.typing import Reward
 from tf2_ros.buffer_interface import TypeException
 INFO_MEAN_STEP_KEY: str = "__mean_step__"
 INFO_MEAN_EPISODE_KEY: str = "__mean_episode__"
@enum.unique
 class CurriculumStage(enum.Enum):
    """
    Ordered enum that represents stages of a curriculum for RL task.
    """
    @classmethod
    def first(self) -> CurriculumStage:
        return self(1)
    @classmethod
    def last(self) -> CurriculumStage:
        return self(len(self))
    def next(self) -> Optional[CurriculumStage]:
        next_value = self.value + 1
        if next_value > self.last().value:
            return None
        else:
            return self(next_value)
    def previous(self) -> Optional[CurriculumStage]:
        previous_value = self.value - 1
        if previous_value < self.first().value:
            return None
        else:
            return self(previous_value)
 class StageRewardCurriculum:
    """
    Curriculum that begins to compute rewards for a stage once all previous stages are complete.
    """
    PERSISTENT_ID: str = "PERSISTENT"
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        curriculum_stage: Type[CurriculumStage],
        stage_reward_multiplier: float,
        dense_reward: bool = False,
        **kwargs,
    ):
        if 0 == len(curriculum_stage):
            raise TypeException(f"{curriculum_stage} has length of 0")
        self.__use_dense_reward = dense_reward
        if self.__use_dense_reward:
            raise ValueError(
                "Dense reward is currently not implemented for any curriculum"
            )
        # Setup internals
        self._stage_type = curriculum_stage
        self._stage_reward_functions: Dict[curriculum_stage, Callable] = {
            curriculum_stage(stage): getattr(self, f"get_reward_{stage.name}")
            for stage in iter(curriculum_stage)
        }
        self.__stage_reward_multipliers: Dict[curriculum_stage, float] = {
            curriculum_stage(stage): stage_reward_multiplier ** (stage.value - 1)
            for stage in iter(curriculum_stage)
        }
        self.stages_completed_this_episode: Dict[curriculum_stage, bool] = {
            curriculum_stage(stage): False for stage in iter(curriculum_stage)
        }
        self.__stages_rewards_this_episode: Dict[curriculum_stage, float] = {
            curriculum_stage(stage): 0.0 for stage in iter(curriculum_stage)
        }
        self.__stages_rewards_this_episode[self.PERSISTENT_ID] = 0.0
        self.__episode_succeeded: bool = False
        self.__episode_failed: bool = False
    def get_reward(self, only_last_stage: bool = False, **kwargs) -> Reward:
        reward = 0.0
        # Determine the stage at which to start computing reward [performance - done stages give no reward]
        if only_last_stage:
            first_stage_to_process = self._stage_type.last()
        else:
            for stage in iter(self._stage_type):
                if not self.stages_completed_this_episode[stage]:
                    first_stage_to_process = stage
                    break
        # Iterate over all stages that might need to be processed
        for stage in range(first_stage_to_process.value, len(self._stage_type) + 1):
            stage = self._stage_type(stage)
            # Compute reward for the current stage
            stage_reward = self._stage_reward_functions[stage](**kwargs)
            # Multiply by the reward multiplier
            stage_reward *= self.__stage_reward_multipliers[stage]
            # Add to the total step reward
            reward += stage_reward
            # Add reward to the list for info
            self.__stages_rewards_this_episode[stage] += stage_reward
            # Break if stage is not yet completed [performance - next stages won't give any reward]
            if not self.stages_completed_this_episode[stage]:
                break
        # If the last stage is complete, the episode has succeeded
        self.__episode_succeeded = self.stages_completed_this_episode[
            self._stage_type.last()
        ]
        if self.__episode_succeeded:
            return reward
        # Add persistent reward that is added regardless of the episode (unless task already succeeded)
        persistent_reward = self.get_persistent_reward(**kwargs)
        # Add to the total step reward
        reward += persistent_reward
        # Add reward to the list for info
        self.__stages_rewards_this_episode[self.PERSISTENT_ID] += persistent_reward
        return reward
    def is_done(self) -> bool:
        if self.__episode_succeeded:
            # The episode ended with success
            self.on_episode_success()
            return True
        elif self.__episode_failed:
            # The episode ended due to failure
            self.on_episode_failure()
            return True
        else:
            # Otherwise, the episode is not yet done
            return False
    def get_info(self) -> Dict:
        # Whether the episode succeeded
        info = {
            "is_success": self.__episode_succeeded,
        }
        # What stage was reached during this episode so far
        for stage in iter(self._stage_type):
            reached_stage = stage
            if not self.stages_completed_this_episode[stage]:
                break
        info.update(
            {
                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_reached_stage_mean": reached_stage.value,
            }
        )
        # Rewards for the individual stages
        info.update(
            {
                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_rew_mean_{stage.value}_{stage.name.lower()}": self.__stages_rewards_this_episode[
                    stage
                ]
                for stage in iter(self._stage_type)
            }
        )
        info.update(
            {
                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_rew_mean_{self.PERSISTENT_ID.lower()}": self.__stages_rewards_this_episode[
                    self.PERSISTENT_ID
                ]
            }
        )
        return info
    def reset_task(self):
        if not (self.__episode_succeeded or self.__episode_failed):
            # The episode ended due to timeout
            self.on_episode_timeout()
        # Reset internals
        self.stages_completed_this_episode = dict.fromkeys(
            self.stages_completed_this_episode, False
        )
        self.__stages_rewards_this_episode = dict.fromkeys(
            self.__stages_rewards_this_episode, 0.0
        )
        self.__stages_rewards_this_episode[self.PERSISTENT_ID] = 0.0
        self.__episode_succeeded = False
        self.__episode_failed = False
    @property
    def episode_succeeded(self) -> bool:
        return self.__episode_succeeded
    @episode_succeeded.setter
    def episode_succeeded(self, value: bool):
        self.__episode_succeeded = value
    @property
    def episode_failed(self) -> bool:
        return self.__episode_failed
    @episode_failed.setter
    def episode_failed(self, value: bool):
        self.__episode_failed = value
    @property
    def use_dense_reward(self) -> bool:
        return self.__use_dense_reward
    def get_persistent_reward(self, **kwargs) -> float:
        """
        Virtual method.
        """
        reward = 0.0
        return reward
    def on_episode_success(self):
        """
        Virtual method.
        """
        pass
    def on_episode_failure(self):
        """
        Virtual method.
        """
        pass
    def on_episode_timeout(self):
        """
        Virtual method.
        """
        pass
 class SuccessRateImpl:
    """
    Moving average over the success rate of last N episodes.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        initial_success_rate: float = 0.0,
        rolling_average_n: int = 100,
        **kwargs,
    ):
        self.__success_rate = initial_success_rate
        self.__rolling_average_n = rolling_average_n
        # Setup internals
        self.__previous_success_rate_weight: int = 0
        self.__collected_samples: int = 0
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}_success_rate": self.__success_rate,
        }
        return info
    def update_success_rate(self, is_success: bool):
        # Until `rolling_average_n` is reached, use number of collected samples during computations
        if self.__collected_samples < self.__rolling_average_n:
            self.__previous_success_rate_weight = self.__collected_samples
            self.__collected_samples += 1
        self.__success_rate = (
            self.__previous_success_rate_weight * self.__success_rate
            + float(is_success)
        ) / self.__collected_samples
    @property
    def success_rate(self) -> float:
        return self.__success_rate
 class WorkspaceScaleCurriculum:
    """
    Curriculum that increases the workspace size as the success rate increases.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        task: Task,
        success_rate_impl: SuccessRateImpl,
        min_workspace_scale: float,
        max_workspace_volume: Tuple[float, float, float],
        max_workspace_scale_success_rate_threshold: float,
        **kwargs,
    ):
        self.__task = task
        self.__success_rate_impl = success_rate_impl
        self.__min_workspace_scale = min_workspace_scale
        self.__max_workspace_volume = max_workspace_volume
        self.__max_workspace_scale_success_rate_threshold = (
            max_workspace_scale_success_rate_threshold
        )
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}workspace_scale": self.__workspace_scale,
        }
        return info
    def reset_task(self):
        # Update workspace size
        self.__update_workspace_size()
    def __update_workspace_size(self):
        self.__workspace_scale = min(
            1.0,
            max(
                self.__min_workspace_scale,
                self.__success_rate_impl.success_rate
                / self.__max_workspace_scale_success_rate_threshold,
            ),
        )
        workspace_volume_new = (
            self.__workspace_scale * self.__max_workspace_volume[0],
            self.__workspace_scale * self.__max_workspace_volume[1],
            # Z workspace is currently kept the same on purpose
            self.__max_workspace_volume[2],
        )
        workspace_volume_half_new = (
            workspace_volume_new[0] / 2,
            workspace_volume_new[1] / 2,
            workspace_volume_new[2] / 2,
        )
        workspace_min_bound_new = (
            self.__task.workspace_centre[0] - workspace_volume_half_new[0],
            self.__task.workspace_centre[1] - workspace_volume_half_new[1],
            self.__task.workspace_centre[2] - workspace_volume_half_new[2],
        )
        workspace_max_bound_new = (
            self.__task.workspace_centre[0] + workspace_volume_half_new[0],
            self.__task.workspace_centre[1] + workspace_volume_half_new[1],
            self.__task.workspace_centre[2] + workspace_volume_half_new[2],
        )
        self.__task.add_task_parameter_overrides(
            {
                "workspace_volume": workspace_volume_new,
                "workspace_min_bound": workspace_min_bound_new,
                "workspace_max_bound": workspace_max_bound_new,
            }
        )
 class ObjectSpawnVolumeScaleCurriculum:
    """
    Curriculum that increases the object spawn volume as the success rate increases.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        task: Task,
        success_rate_impl: SuccessRateImpl,
        min_object_spawn_volume_scale: float,
        max_object_spawn_volume: Tuple[float, float, float],
        max_object_spawn_volume_scale_success_rate_threshold: float,
        **kwargs,
    ):
        self.__task = task
        self.__success_rate_impl = success_rate_impl
        self.__min_object_spawn_volume_scale = min_object_spawn_volume_scale
        self.__max_object_spawn_volume = max_object_spawn_volume
        self.__max_object_spawn_volume_scale_success_rate_threshold = (
            max_object_spawn_volume_scale_success_rate_threshold
        )
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}object_spawn_volume_scale": self.__object_spawn_volume_scale,
        }
        return info
    def reset_task(self):
        # Update object_spawn_volume size
        self.__update_object_spawn_volume_size()
    def __update_object_spawn_volume_size(self):
        self.__object_spawn_volume_scale = min(
            1.0,
            max(
                self.__min_object_spawn_volume_scale,
                self.__success_rate_impl.success_rate
                / self.__max_object_spawn_volume_scale_success_rate_threshold,
            ),
        )
        object_spawn_volume_volume_new = (
            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[0],
            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[1],
            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[2],
        )
        self.__task.add_randomizer_parameter_overrides(
            {
                "object_random_spawn_volume": object_spawn_volume_volume_new,
            }
        )
 class ObjectCountCurriculum:
    """
    Curriculum that increases the number of objects as the success rate increases.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        task: Task,
        success_rate_impl: SuccessRateImpl,
        object_count_min: int,
        object_count_max: int,
        max_object_count_success_rate_threshold: float,
        **kwargs,
    ):
        self.__task = task
        self.__success_rate_impl = success_rate_impl
        self.__object_count_min = object_count_min
        self.__object_count_max = object_count_max
        self.__max_object_count_success_rate_threshold = (
            max_object_count_success_rate_threshold
        )
        self.__object_count_min_max_diff = object_count_max - object_count_min
        if self.__object_count_min_max_diff < 0:
            raise Exception(
                "'object_count_min' cannot be larger than 'object_count_max'"
            )
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}object_count": self.__object_count,
        }
        return info
    def reset_task(self):
        # Update object count
        self.__update_object_count()
    def __update_object_count(self):
        self.__object_count = min(
            self.__object_count_max,
            math.floor(
                self.__object_count_min
                + (
                    self.__success_rate_impl.success_rate
                    / self.__max_object_count_success_rate_threshold
                )
                * self.__object_count_min_max_diff
            ),
        )
        self.__task.add_randomizer_parameter_overrides(
            {
                "object_count": self.__object_count,
            }
        )
 class ArmStuckChecker:
    """
    Checker for arm getting stuck.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        task: Task,
        arm_stuck_n_steps: int,
        arm_stuck_min_joint_difference_norm: float,
        **kwargs,
    ):
        self.__task = task
        self.__arm_stuck_min_joint_difference_norm = arm_stuck_min_joint_difference_norm
        # List of previous join positions (used to compute difference norm with an older previous reading)
        self.__previous_joint_positions: Deque[np.ndarray] = deque(
            [], maxlen=arm_stuck_n_steps
        )
        # Counter of how many time the robot got stuck
        self.__robot_stuck_total_counter: int = 0
        # Initialize list of indices for the arm.
        # It is assumed that these indices do not change during the operation
        self.__arm_joint_indices = None
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}robot_stuck_count": self.__robot_stuck_total_counter,
        }
        return info
    def reset_task(self):
        self.__previous_joint_positions.clear()
        joint_positions = self.__get_arm_joint_positions()
        if joint_positions is not None:
            self.__previous_joint_positions.append(joint_positions)
    def is_robot_stuck(self) -> bool:
        # Get current position and append to the list of previous ones
        current_joint_positions = self.__get_arm_joint_positions()
        if current_joint_positions is not None:
            self.__previous_joint_positions.append(current_joint_positions)
        # Stop checking if there is not yet enough entries in the list
        if (
            len(self.__previous_joint_positions)
            < self.__previous_joint_positions.maxlen
        ):
            return False
        # Make sure the length of joint position matches
        if len(current_joint_positions) != len(self.__previous_joint_positions[0]):
            return False
        # Compute joint difference norm only with the `t - arm_stuck_n_steps` entry first (performance reason)
        joint_difference_norm = np.linalg.norm(
            current_joint_positions - self.__previous_joint_positions[0]
        )
        # If the difference is large enough, the arm does not appear to be stuck, so skip computing all other entries
        if joint_difference_norm > self.__arm_stuck_min_joint_difference_norm:
            return False
        # If it is too small, consider all other entries as well
        joint_difference_norms = np.linalg.norm(
            current_joint_positions
            - list(itertools.islice(self.__previous_joint_positions, 1, None)),
            axis=1,
        )
        # Return true (stuck) if all joint difference entries are too small
        is_stuck = all(
            joint_difference_norms < self.__arm_stuck_min_joint_difference_norm
        )
        self.__robot_stuck_total_counter += int(is_stuck)
        return is_stuck
    def __get_arm_joint_positions(self) -> Optional[np.ndarray[float]]:
        joint_state = self.__task.moveit2.joint_state
        if joint_state is None:
            return None
        if self.__arm_joint_indices is None:
            self.__arm_joint_indices = [
                i
                for i, joint_name in enumerate(joint_state.name)
                if joint_name in self.__task.robot_arm_joint_names
            ]
        return np.take(joint_state.position, self.__arm_joint_indices)
 class AttributeCurriculum:
    """
    Curriculum that increases the value of an attribute (e.g. requirement) as the success rate increases.
    Currently support only attributes that are increasing.
    """
    INFO_CURRICULUM_PREFIX: str = "curriculum/"
    def __init__(
        self,
        success_rate_impl: SuccessRateImpl,
        attribute_owner: Type,
        attribute_name: str,
        initial_value: float,
        target_value: float,
        target_value_threshold: float,
        **kwargs,
    ):
        self.__success_rate_impl = success_rate_impl
        self.__attribute_owner = attribute_owner
        self.__attribute_name = attribute_name
        self.__initial_value = initial_value
        self.__target_value_threshold = target_value_threshold
        # Initialise current value of the attribute
        self.__current_value = initial_value
        # Store difference for faster computations
        self.__value_diff = target_value - initial_value
    def get_info(self) -> Dict:
        info = {
            f"{self.INFO_CURRICULUM_PREFIX}{self.__attribute_name}": self.__current_value,
        }
        return info
    def reset_task(self):
        # Update object count
        self.__update_attribute()
    def __update_attribute(self):
        scale = min(
            1.0,
            max(
                self.__initial_value,
                self.__success_rate_impl.success_rate / self.__target_value_threshold,
            ),
        )
        self.__current_value = self.__initial_value + (scale * self.__value_diff)
        if hasattr(self.__attribute_owner, self.__attribute_name):
            setattr(self.__attribute_owner, self.__attribute_name, self.__current_value)
        elif hasattr(self.__attribute_owner, f"_{self.__attribute_name}"):
            setattr(
                self.__attribute_owner,
                f"_{self.__attribute_name}",
                self.__current_value,
            )
        elif hasattr(self.__attribute_owner, f"__{self.__attribute_name}"):
            setattr(
                self.__attribute_owner,
                f"__{self.__attribute_name}",
                self.__current_value,
            )
        else:
            raise Exception(
                f"Attribute owner '{self.__attribute_owner}' does not have any attribute named {self.__attribute_name}."
            )
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/grasp.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/grasp.py
@ -0,0 +1,341 @@
 from typing import Dict, List, Tuple
 from gym_gz.base.task import Task
 from rbs_gym.envs.tasks.curriculums.common import *
 from rbs_gym.envs.utils.math import distance_to_nearest_point
 class GraspStage(CurriculumStage):
    """
    Ordered enum that represents stages of a curriculum for Grasp (and GraspPlanetary) task.
    """
    REACH = 1
    TOUCH = 2
    GRASP = 3
    LIFT = 4
 class GraspCurriculum(
    StageRewardCurriculum,
    SuccessRateImpl,
    WorkspaceScaleCurriculum,
    ObjectSpawnVolumeScaleCurriculum,
    ObjectCountCurriculum,
    ArmStuckChecker,
 ):
    """
    Curriculum learning implementation for grasp task that provides termination (success/fail) and reward for each stage of the task.
    """
    def __init__(
        self,
        task: Task,
        stages_base_reward: float,
        reach_required_distance: float,
        lift_required_height: float,
        persistent_reward_each_step: float,
        persistent_reward_terrain_collision: float,
        persistent_reward_all_objects_outside_workspace: float,
        persistent_reward_arm_stuck: float,
        enable_stage_reward_curriculum: bool,
        enable_workspace_scale_curriculum: bool,
        enable_object_spawn_volume_scale_curriculum: bool,
        enable_object_count_curriculum: bool,
        reach_required_distance_min: Optional[float] = None,
        reach_required_distance_max: Optional[float] = None,
        reach_required_distance_max_threshold: Optional[float] = None,
        lift_required_height_min: Optional[float] = None,
        lift_required_height_max: Optional[float] = None,
        lift_required_height_max_threshold: Optional[float] = None,
        **kwargs,
    ):
        StageRewardCurriculum.__init__(self, curriculum_stage=GraspStage, **kwargs)
        SuccessRateImpl.__init__(self, **kwargs)
        WorkspaceScaleCurriculum.__init__(
            self, task=task, success_rate_impl=self, **kwargs
        )
        ObjectSpawnVolumeScaleCurriculum.__init__(
            self, task=task, success_rate_impl=self, **kwargs
        )
        ObjectCountCurriculum.__init__(
            self, task=task, success_rate_impl=self, **kwargs
        )
        ArmStuckChecker.__init__(self, task=task, **kwargs)
        # Grasp task/environment that will be used to extract information from the scene
        self.__task = task
        # Parameters
        self.__stages_base_reward = stages_base_reward
        self.reach_required_distance = reach_required_distance
        self.lift_required_height = lift_required_height
        self.__persistent_reward_each_step = persistent_reward_each_step
        self.__persistent_reward_terrain_collision = persistent_reward_terrain_collision
        self.__persistent_reward_all_objects_outside_workspace = (
            persistent_reward_all_objects_outside_workspace
        )
        self.__persistent_reward_arm_stuck = persistent_reward_arm_stuck
        self.__enable_stage_reward_curriculum = enable_stage_reward_curriculum
        self.__enable_workspace_scale_curriculum = enable_workspace_scale_curriculum
        self.__enable_object_spawn_volume_scale_curriculum = (
            enable_object_spawn_volume_scale_curriculum
        )
        self.__enable_object_count_curriculum = enable_object_count_curriculum
        # Make sure that the persistent rewards for each step are negative
        if self.__persistent_reward_each_step > 0.0:
            self.__persistent_reward_each_step *= -1.0
        if self.__persistent_reward_terrain_collision > 0.0:
            self.__persistent_reward_terrain_collision *= -1.0
        if self.__persistent_reward_all_objects_outside_workspace > 0.0:
            self.__persistent_reward_all_objects_outside_workspace *= -1.0
        if self.__persistent_reward_arm_stuck > 0.0:
            self.__persistent_reward_arm_stuck *= -1.0
        # Setup curriculum for Reach distance requirement (if enabled)
        reach_required_distance_min = (
            reach_required_distance_min
            if reach_required_distance_min is not None
            else reach_required_distance
        )
        reach_required_distance_max = (
            reach_required_distance_max
            if reach_required_distance_max is not None
            else reach_required_distance
        )
        reach_required_distance_max_threshold = (
            reach_required_distance_max_threshold
            if reach_required_distance_max_threshold is not None
            else 0.5
        )
        self.__reach_required_distance_curriculum_enabled = (
            not reach_required_distance_min == reach_required_distance_max
        )
        if self.__reach_required_distance_curriculum_enabled:
            self.__reach_required_distance_curriculum = AttributeCurriculum(
                success_rate_impl=self,
                attribute_owner=self,
                attribute_name="reach_required_distance",
                initial_value=reach_required_distance_min,
                target_value=reach_required_distance_max,
                target_value_threshold=reach_required_distance_max_threshold,
            )
        # Setup curriculum for Lift height requirement (if enabled)
        lift_required_height_min = (
            lift_required_height_min
            if lift_required_height_min is not None
            else lift_required_height
        )
        lift_required_height_max = (
            lift_required_height_max
            if lift_required_height_max is not None
            else lift_required_height
        )
        lift_required_height_max_threshold = (
            lift_required_height_max_threshold
            if lift_required_height_max_threshold is not None
            else 0.5
        )
        # Offset Lift height requirement by the robot base offset
        lift_required_height += task.robot_model_class.BASE_LINK_Z_OFFSET
        lift_required_height_min += task.robot_model_class.BASE_LINK_Z_OFFSET
        lift_required_height_max += task.robot_model_class.BASE_LINK_Z_OFFSET
        lift_required_height_max_threshold += task.robot_model_class.BASE_LINK_Z_OFFSET
        self.__lift_required_height_curriculum_enabled = (
            not lift_required_height_min == lift_required_height_max
        )
        if self.__lift_required_height_curriculum_enabled:
            self.__lift_required_height_curriculum = AttributeCurriculum(
                success_rate_impl=self,
                attribute_owner=self,
                attribute_name="lift_required_height",
                initial_value=lift_required_height_min,
                target_value=lift_required_height_max,
                target_value_threshold=lift_required_height_max_threshold,
            )
    def get_reward(self) -> Reward:
        if self.__enable_stage_reward_curriculum:
            # Try to get reward from each stage
            return StageRewardCurriculum.get_reward(
                self,
                ee_position=self.__task.get_ee_position(),
                object_positions=self.__task.get_object_positions(),
                touched_objects=self.__task.get_touched_objects(),
                grasped_objects=self.__task.get_grasped_objects(),
            )
        else:
            # If curriculum for stages is disabled, compute reward only for the last stage
            return StageRewardCurriculum.get_reward(
                self,
                only_last_stage=True,
                object_positions=self.__task.get_object_positions(),
                grasped_objects=self.__task.get_grasped_objects(),
            )
    def is_done(self) -> bool:
        return StageRewardCurriculum.is_done(self)
    def get_info(self) -> Dict:
        info = StageRewardCurriculum.get_info(self)
        info.update(SuccessRateImpl.get_info(self))
        if self.__enable_workspace_scale_curriculum:
            info.update(WorkspaceScaleCurriculum.get_info(self))
        if self.__enable_object_spawn_volume_scale_curriculum:
            info.update(ObjectSpawnVolumeScaleCurriculum.get_info(self))
        if self.__enable_object_count_curriculum:
            info.update(ObjectCountCurriculum.get_info(self))
        if self.__persistent_reward_arm_stuck:
            info.update(ArmStuckChecker.get_info(self))
        if self.__reach_required_distance_curriculum_enabled:
            info.update(self.__reach_required_distance_curriculum.get_info())
        if self.__lift_required_height_curriculum_enabled:
            info.update(self.__lift_required_height_curriculum.get_info())
        return info
    def reset_task(self):
        StageRewardCurriculum.reset_task(self)
        if self.__enable_workspace_scale_curriculum:
            WorkspaceScaleCurriculum.reset_task(self)
        if self.__enable_object_spawn_volume_scale_curriculum:
            ObjectSpawnVolumeScaleCurriculum.reset_task(self)
        if self.__enable_object_count_curriculum:
            ObjectCountCurriculum.reset_task(self)
        if self.__persistent_reward_arm_stuck:
            ArmStuckChecker.reset_task(self)
        if self.__reach_required_distance_curriculum_enabled:
            self.__reach_required_distance_curriculum.reset_task()
        if self.__lift_required_height_curriculum_enabled:
            self.__lift_required_height_curriculum.reset_task()
    def on_episode_success(self):
        self.update_success_rate(is_success=True)
    def on_episode_failure(self):
        self.update_success_rate(is_success=False)
    def on_episode_timeout(self):
        self.update_success_rate(is_success=False)
    def get_reward_REACH(
        self,
        ee_position: Tuple[float, float, float],
        object_positions: Dict[str, Tuple[float, float, float]],
        **kwargs,
    ) -> float:
        if not object_positions:
            return 0.0
        nearest_object_distance = distance_to_nearest_point(
            origin=ee_position, points=list(object_positions.values())
        )
        self.__task.get_logger().debug(
            f"[Curriculum] Distance to nearest object: {nearest_object_distance}"
        )
        if nearest_object_distance < self.reach_required_distance:
            self.__task.get_logger().info(
                f"[Curriculum] An object is now closer than the required distance of {self.reach_required_distance}"
            )
            self.stages_completed_this_episode[GraspStage.REACH] = True
            return self.__stages_base_reward
        else:
            return 0.0
    def get_reward_TOUCH(self, touched_objects: List[str], **kwargs) -> float:
        if touched_objects:
            self.__task.get_logger().info(
                f"[Curriculum] Touched objects: {touched_objects}"
            )
            self.stages_completed_this_episode[GraspStage.TOUCH] = True
            return self.__stages_base_reward
        else:
            return 0.0
    def get_reward_GRASP(self, grasped_objects: List[str], **kwargs) -> float:
        if grasped_objects:
            self.__task.get_logger().info(
                f"[Curriculum] Grasped objects: {grasped_objects}"
            )
            self.stages_completed_this_episode[GraspStage.GRASP] = True
            return self.__stages_base_reward
        else:
            return 0.0
    def get_reward_LIFT(
        self,
        object_positions: Dict[str, Tuple[float, float, float]],
        grasped_objects: List[str],
        **kwargs,
    ) -> float:
        if not (grasped_objects or object_positions):
            return 0.0
        for grasped_object in grasped_objects:
            grasped_object_height = object_positions[grasped_object][2]
            self.__task.get_logger().debug(
                f"[Curriculum] Height of grasped object '{grasped_objects}': {grasped_object_height}"
            )
            if grasped_object_height > self.lift_required_height:
                self.__task.get_logger().info(
                    f"[Curriculum] Lifted object: {grasped_object}"
                )
                self.stages_completed_this_episode[GraspStage.LIFT] = True
                return self.__stages_base_reward
        return 0.0
    def get_persistent_reward(
        self, object_positions: Dict[str, Tuple[float, float, float]], **kwargs
    ) -> float:
        # Subtract a small reward each step to provide incentive to act quickly
        reward = self.__persistent_reward_each_step
        # Negative reward for colliding with terrain
        if self.__persistent_reward_terrain_collision:
            if self.__task.check_terrain_collision():
                self.__task.get_logger().info(
                    "[Curriculum] Robot collided with the terrain"
                )
                reward += self.__persistent_reward_terrain_collision
        # Negative reward for having all objects outside of the workspace
        if self.__persistent_reward_all_objects_outside_workspace:
            if self.__task.check_all_objects_outside_workspace(
                object_positions=object_positions
            ):
                self.__task.get_logger().warn(
                    "[Curriculum] All objects are outside of the workspace"
                )
                reward += self.__persistent_reward_all_objects_outside_workspace
                self.episode_failed = True
        # Negative reward for arm getting stuck
        if self.__persistent_reward_arm_stuck:
            if ArmStuckChecker.is_robot_stuck(self):
                self.__task.get_logger().error(
                    f"[Curriculum] Robot appears to be stuck, resetting..."
                )
                reward += self.__persistent_reward_arm_stuck
                self.episode_failed = True
        return reward
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/manipulation.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/manipulation.py
@ -0,0 +1,670 @@
 import abc
 import multiprocessing
 import sys
 from itertools import count
 from threading import Thread
 from typing import Dict, Optional, Tuple, Union
 import numpy as np
 import rclpy
 from gym_gz.base.task import Task
 from gym_gz.utils.typing import (
    Action,
    ActionSpace,
    Observation,
    ObservationSpace,
    Reward,
 )
 from rclpy.callback_groups import ReentrantCallbackGroup
 from rclpy.executors import MultiThreadedExecutor, SingleThreadedExecutor
 from rclpy.node import Node
 from scipy.spatial.transform import Rotation
 from rbs_gym.envs.control import *
 from rbs_gym.envs.models.robots import get_robot_model_class
 from rbs_gym.envs.utils import Tf2Broadcaster, Tf2Listener
 from rbs_gym.envs.utils.conversions import orientation_6d_to_quat
 from rbs_gym.envs.utils.gazebo import *
 from rbs_gym.envs.utils.math import quat_mul
 class Manipulation(Task, Node, abc.ABC):
    _ids = count(0)
    def __init__(
        self,
        agent_rate: float,
        robot_model: str,
        workspace_frame_id: str,
        workspace_centre: Tuple[float, float, float],
        workspace_volume: Tuple[float, float, float],
        ignore_new_actions_while_executing: bool,
        use_servo: bool,
        scaling_factor_translation: float,
        scaling_factor_rotation: float,
        restrict_position_goal_to_workspace: bool,
        enable_gripper: bool,
        num_threads: int,
        **kwargs,
    ):
        # Get next ID for this task instance
        self.id = next(self._ids)
        # Initialize the Task base class
        Task.__init__(self, agent_rate=agent_rate)
        # Initialize ROS 2 context (if not done before)
        try:
            rclpy.init()
        except Exception as e:
            if not rclpy.ok():
                sys.exit(f"ROS 2 context could not be initialised: {e}")
        # Initialize ROS 2 Node base class
        Node.__init__(self, f"rbs_gym_{self.id}")
        # Create callback group that allows execution of callbacks in parallel without restrictions
        self._callback_group = ReentrantCallbackGroup()
        # Create executor
        if num_threads == 1:
            executor = SingleThreadedExecutor()
        elif num_threads > 1:
            executor = MultiThreadedExecutor(
                num_threads=num_threads,
            )
        else:
            executor = MultiThreadedExecutor(num_threads=multiprocessing.cpu_count())
        # Add this node to the executor
        executor.add_node(self)
        # Spin this node in background thread(s)
        self._executor_thread = Thread(target=executor.spin, daemon=True, args=())
        self._executor_thread.start()
        # Get class of the robot model based on passed argument
        self.robot_model_class = get_robot_model_class(robot_model)
        # Store passed arguments for later use
        self.workspace_centre = (
            workspace_centre[0],
            workspace_centre[1],
            workspace_centre[2] + self.robot_model_class.BASE_LINK_Z_OFFSET,
        )
        self.workspace_volume = workspace_volume
        self._restrict_position_goal_to_workspace = restrict_position_goal_to_workspace
        self._use_servo = use_servo
        self.__scaling_factor_translation = scaling_factor_translation
        self.__scaling_factor_rotation = scaling_factor_rotation
        self._enable_gripper = enable_gripper
        # Get workspace bounds, useful is many computations
        workspace_volume_half = (
            workspace_volume[0] / 2,
            workspace_volume[1] / 2,
            workspace_volume[2] / 2,
        )
        self.workspace_min_bound = (
            self.workspace_centre[0] - workspace_volume_half[0],
            self.workspace_centre[1] - workspace_volume_half[1],
            self.workspace_centre[2] - workspace_volume_half[2],
        )
        self.workspace_max_bound = (
            self.workspace_centre[0] + workspace_volume_half[0],
            self.workspace_centre[1] + workspace_volume_half[1],
            self.workspace_centre[2] + workspace_volume_half[2],
        )
        # Determine robot name and prefix based on current ID of the task
        self.robot_prefix: str = self.robot_model_class.DEFAULT_PREFIX
        if 0 == self.id:
            self.robot_name = self.robot_model_class.ROBOT_MODEL_NAME
        else:
            self.robot_name = f"{self.robot_model_class.ROBOT_MODEL_NAME}{self.id}"
            if self.robot_prefix.endswith("_"):
                self.robot_prefix = f"{self.robot_prefix[:-1]}{self.id}_"
            elif self.robot_prefix == "":
                self.robot_prefix = f"robot{self.id}_"
        # Names of specific robot links, useful all around the code
        self.robot_base_link_name = self.robot_model_class.get_robot_base_link_name(
            self.robot_prefix
        )
        self.robot_arm_base_link_name = self.robot_model_class.get_arm_base_link_name(
            self.robot_prefix
        )
        self.robot_ee_link_name = self.robot_model_class.get_ee_link_name(
            self.robot_prefix
        )
        self.robot_arm_link_names = self.robot_model_class.get_arm_link_names(
            self.robot_prefix
        )
        self.robot_gripper_link_names = self.robot_model_class.get_gripper_link_names(
            self.robot_prefix
        )
        self.robot_arm_joint_names = self.robot_model_class.get_arm_joint_names(
            self.robot_prefix
        )
        self.robot_gripper_joint_names = self.robot_model_class.get_gripper_joint_names(
            self.robot_prefix
        )
        # Get exact name substitution of the frame for workspace
        self.workspace_frame_id = self.substitute_special_frame(workspace_frame_id)
        # Specify initial positions (default configuration is used here)
        self.initial_arm_joint_positions = (
            self.robot_model_class.DEFAULT_ARM_JOINT_POSITIONS
        )
        self.initial_gripper_joint_positions = (
            self.robot_model_class.DEFAULT_GRIPPER_JOINT_POSITIONS
        )
        # Names of important models (in addition to robot model)
        self.terrain_name = "terrain"
        self.object_names = []
        # Setup listener and broadcaster of transforms via tf2
        self.tf2_listener = Tf2Listener(node=self)
        self.tf2_broadcaster = Tf2Broadcaster(node=self)
        self.cartesian_control = True #TODO: make it as an external parameter
        # Setup controllers
        self.controller = CartesianForceController(self)
        if not self.cartesian_control:
            self.joint_controller = JointEffortController(self)
        if self._enable_gripper:
            self.gripper = GripperController(self, 0.064)
        # Initialize task and randomizer overrides (e.g. from curriculum)
        # Both of these are consumed at the beginning of reset
        self.__task_parameter_overrides: Dict[str, any] = {}
        self._randomizer_parameter_overrides: Dict[str, any] = {}
    def create_spaces(self) -> Tuple[ActionSpace, ObservationSpace]:
        action_space = self.create_action_space()
        observation_space = self.create_observation_space()
        return action_space, observation_space
    def create_action_space(self) -> ActionSpace:
        raise NotImplementedError()
    def create_observation_space(self) -> ObservationSpace:
        raise NotImplementedError()
    def set_action(self, action: Action):
        raise NotImplementedError()
    def get_observation(self) -> Observation:
        raise NotImplementedError()
    def get_reward(self) -> Reward:
        raise NotImplementedError()
    def is_done(self) -> bool:
        raise NotImplementedError()
    def reset_task(self):
        self.__consume_parameter_overrides()
    # Helper functions #
    def get_relative_ee_position(
        self, translation: Tuple[float, float, float]
    ) -> Tuple[float, float, float]:
        # Scale relative action to metric units
        translation = self.scale_relative_translation(translation)
        # Get current position
        current_position = self.get_ee_position()
        # Compute target position
        target_position = (
            current_position[0] + translation[0],
            current_position[1] + translation[1],
            current_position[2] + translation[2],
        )
        # Restrict target position to a limited workspace, if desired
        if self._restrict_position_goal_to_workspace:
            target_position = self.restrict_position_goal_to_workspace(target_position)
        return target_position
    def get_relative_ee_orientation(
        self,
        rotation: Union[
            float,
            Tuple[float, float, float, float],
            Tuple[float, float, float, float, float, float],
        ],
        representation: str = "quat",
    ) -> Tuple[float, float, float, float]:
        # Get current orientation
        current_quat_xyzw = self.get_ee_orientation()
        # For 'z' representation, result should always point down
        # Therefore, create a new quatertnion that contains only yaw component
        if "z" == representation:
            current_yaw = Rotation.from_quat(current_quat_xyzw).as_euler("xyz")[2]
            current_quat_xyzw = Rotation.from_euler(
                "xyz", [np.pi, 0, current_yaw]
            ).as_quat()
        # Convert relative orientation representation to quaternion
        relative_quat_xyzw = None
        if "quat" == representation:
            relative_quat_xyzw = rotation
        elif "6d" == representation:
            vectors = tuple(
                rotation[x : x + 3] for x, _ in enumerate(rotation) if x % 3 == 0
            )
            relative_quat_xyzw = orientation_6d_to_quat(vectors[0], vectors[1])
        elif "z" == representation:
            rotation = self.scale_relative_rotation(rotation)
            relative_quat_xyzw = Rotation.from_euler("xyz", [0, 0, rotation]).as_quat()
        # Compute target position (combine quaternions)
        target_quat_xyzw = quat_mul(current_quat_xyzw, relative_quat_xyzw)
        # Normalise quaternion (should not be needed, but just to be safe)
        target_quat_xyzw /= np.linalg.norm(target_quat_xyzw)
        return target_quat_xyzw
    def scale_relative_translation(
        self, translation: Tuple[float, float, float]
    ) -> Tuple[float, float, float]:
        return (
            self.__scaling_factor_translation * translation[0],
            self.__scaling_factor_translation * translation[1],
            self.__scaling_factor_translation * translation[2],
        )
    def scale_relative_rotation(
        self,
        rotation: Union[float, Tuple[float, float, float], np.floating, np.ndarray],
    ) -> float:
        if not hasattr(rotation, "__len__"):
            return self.__scaling_factor_rotation * rotation
        else:
            return (
                self.__scaling_factor_rotation * rotation[0],
                self.__scaling_factor_rotation * rotation[1],
                self.__scaling_factor_rotation * rotation[2],
            )
    def restrict_position_goal_to_workspace(
        self, position: Tuple[float, float, float]
    ) -> Tuple[float, float, float]:
        return (
            min(
                self.workspace_max_bound[0],
                max(
                    self.workspace_min_bound[0],
                    position[0],
                ),
            ),
            min(
                self.workspace_max_bound[1],
                max(
                    self.workspace_min_bound[1],
                    position[1],
                ),
            ),
            min(
                self.workspace_max_bound[2],
                max(
                    self.workspace_min_bound[2],
                    position[2],
                ),
            ),
        )
    def restrict_servo_translation_to_workspace(
        self, translation: Tuple[float, float, float]
    ) -> Tuple[float, float, float]:
        current_ee_position = self.get_ee_position()
        translation = tuple(
            0.0
            if (
                current_ee_position[i] > self.workspace_max_bound[i]
                and translation[i] > 0.0
            )
            or (
                current_ee_position[i] < self.workspace_min_bound[i]
                and translation[i] < 0.0
            )
            else translation[i]
            for i in range(3)
        )
        return translation
    def get_ee_pose(
        self,
    ) -> Optional[Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]]:
        """
        Return the current pose of the end effector with respect to arm base link.
        """
        try:
            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
            ee_position, ee_quat_xyzw = get_model_pose(
                world=self.world,
                model=robot_model,
                link=self.robot_ee_link_name,
                xyzw=True,
            )
            return transform_change_reference_frame_pose(
                world=self.world,
                position=ee_position,
                quat=ee_quat_xyzw,
                target_model=robot_model,
                target_link=self.robot_arm_base_link_name,
                xyzw=True,
            )
        except Exception as e:
            self.get_logger().warn(
                f"Cannot get end effector pose from Gazebo ({e}), using tf2..."
            )
            transform = self.tf2_listener.lookup_transform_sync(
                source_frame=self.robot_ee_link_name,
                target_frame=self.robot_arm_base_link_name,
                retry=False,
            )
            if transform is not None:
                return (
                    (
                        transform.translation.x,
                        transform.translation.y,
                        transform.translation.z,
                    ),
                    (
                        transform.rotation.x,
                        transform.rotation.y,
                        transform.rotation.z,
                        transform.rotation.w,
                    ),
                )
            else:
                self.get_logger().error(
                    "Cannot get pose of the end effector (default values are returned)"
                )
                return (
                    (0.0, 0.0, 0.0),
                    (0.0, 0.0, 0.0, 1.0),
                )
    def get_ee_position(self) -> Tuple[float, float, float]:
        """
        Return the current position of the end effector with respect to arm base link.
        """
        try:
            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
            ee_position = get_model_position(
                world=self.world,
                model=robot_model,
                link=self.robot_ee_link_name,
            )
            return transform_change_reference_frame_position(
                world=self.world,
                position=ee_position,
                target_model=robot_model,
                target_link=self.robot_arm_base_link_name,
            )
        except Exception as e:
            self.get_logger().debug(
                f"Cannot get end effector position from Gazebo ({e}), using tf2..."
            )
            transform = self.tf2_listener.lookup_transform_sync(
                source_frame=self.robot_ee_link_name,
                target_frame=self.robot_arm_base_link_name,
                retry=False,
            )
            if transform is not None:
                return (
                    transform.translation.x,
                    transform.translation.y,
                    transform.translation.z,
                )
            else:
                self.get_logger().error(
                    "Cannot get position of the end effector (default values are returned)"
                )
                return (0.0, 0.0, 0.0)
    def get_ee_orientation(self) -> Tuple[float, float, float, float]:
        """
        Return the current xyzw quaternion of the end effector with respect to arm base link.
        """
        try:
            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
            ee_quat_xyzw = get_model_orientation(
                world=self.world,
                model=robot_model,
                link=self.robot_ee_link_name,
                xyzw=True,
            )
            return transform_change_reference_frame_orientation(
                world=self.world,
                quat=ee_quat_xyzw,
                target_model=robot_model,
                target_link=self.robot_arm_base_link_name,
                xyzw=True,
            )
        except Exception as e:
            self.get_logger().warn(
                f"Cannot get end effector orientation from Gazebo ({e}), using tf2..."
            )
            transform = self.tf2_listener.lookup_transform_sync(
                source_frame=self.robot_ee_link_name,
                target_frame=self.robot_arm_base_link_name,
                retry=False,
            )
            if transform is not None:
                return (
                    transform.rotation.x,
                    transform.rotation.y,
                    transform.rotation.z,
                    transform.rotation.w,
                )
            else:
                self.get_logger().error(
                    "Cannot get orientation of the end effector (default values are returned)"
                )
                return (0.0, 0.0, 0.0, 1.0)
    def get_object_position(
        self, object_model: Union[ModelWrapper, str]
    ) -> Tuple[float, float, float]:
        """
        Return the current position of an object with respect to arm base link.
        Note: Only simulated objects are currently supported.
        """
        try:
            object_position = get_model_position(
                world=self.world,
                model=object_model,
            )
            return transform_change_reference_frame_position(
                world=self.world,
                position=object_position,
                target_model=self.robot_name,
                target_link=self.robot_arm_base_link_name,
            )
        except Exception as e:
            self.get_logger().error(
                f"Cannot get position of {object_model} object (default values are returned): {e}"
            )
            return (0.0, 0.0, 0.0)
    def get_object_positions(self) -> Dict[str, Tuple[float, float, float]]:
        """
        Return the current position of all objects with respect to arm base link.
        Note: Only simulated objects are currently supported.
        """
        object_positions = {}
        try:
            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
            robot_arm_base_link = robot_model.get_link(
                link_name=self.robot_arm_base_link_name
            )
            for object_name in self.object_names:
                object_position = get_model_position(
                    world=self.world,
                    model=object_name,
                )
                object_positions[
                    object_name
                ] = transform_change_reference_frame_position(
                    world=self.world,
                    position=object_position,
                    target_model=robot_model,
                    target_link=robot_arm_base_link,
                )
        except Exception as e:
            self.get_logger().error(
                f"Cannot get positions of all objects (empty Dict is returned): {e}"
            )
        return object_positions
    def substitute_special_frame(self, frame_id: str) -> str:
        if "arm_base_link" == frame_id:
            return self.robot_arm_base_link_name
        elif "base_link" == frame_id:
            return self.robot_base_link_name
        elif "end_effector" == frame_id:
            return self.robot_ee_link_name
        elif "world" == frame_id:
            try:
                # In Gazebo, where multiple worlds are allowed
                return self.world.to_gazebo().name()
            except Exception as e:
                self.get_logger().warn(f"")
                # Otherwise (e.g. real world)
                return "rbs_gym_world"
        else:
            return frame_id
    def wait_until_action_executed(self):
        if self._enable_gripper:
            self.gripper.wait_until_executed()
    def move_to_initial_joint_configuration(self):
        pass
        # self.moveit2.move_to_configuration(self.initial_arm_joint_positions)
        # if (
        #     self.robot_model_class.CLOSED_GRIPPER_JOINT_POSITIONS
        #     == self.initial_gripper_joint_positions
        # ):
        #     self.gripper.reset_close()
        # else:
        #     self.gripper.reset_open()
    def check_terrain_collision(self) -> bool:
        """
        Returns true if robot links are in collision with the ground.
        """
        robot_name_len = len(self.robot_name)
        terrain_model = self.world.get_model(self.terrain_name)
        for contact in terrain_model.contacts():
            body_b = contact.body_b
            if body_b.startswith(self.robot_name) and len(body_b) > robot_name_len:
                link = body_b[robot_name_len + 2:]
                if link != self.robot_base_link_name and (
                    link in self.robot_arm_link_names or link in self.robot_gripper_link_names
                ):
                    return True
        return False
    def check_all_objects_outside_workspace(
        self,
        object_positions: Dict[str, Tuple[float, float, float]],
    ) -> bool:
        """
        Returns true if all objects are outside the workspace
        """
        return all(
            [
                self.check_object_outside_workspace(object_position)
                for object_position in object_positions.values()
            ]
        )
    def check_object_outside_workspace(
        self,
        object_position: Tuple[float, float, float],
    ) -> bool:
        """
        Returns true if the object is outside the workspace
        """
        return (
            object_position[0] < self.workspace_min_bound[0]
            or object_position[1] < self.workspace_min_bound[1]
            or object_position[2] < self.workspace_min_bound[2]
            or object_position[0] > self.workspace_max_bound[0]
            or object_position[1] > self.workspace_max_bound[1]
            or object_position[2] > self.workspace_max_bound[2]
        )
    def add_parameter_overrides(self, parameter_overrides: Dict[str, any]):
        self.add_task_parameter_overrides(parameter_overrides)
        self.add_randomizer_parameter_overrides(parameter_overrides)
    def add_task_parameter_overrides(self, parameter_overrides: Dict[str, any]):
        self.__task_parameter_overrides.update(parameter_overrides)
    def add_randomizer_parameter_overrides(self, parameter_overrides: Dict[str, any]):
        self._randomizer_parameter_overrides.update(parameter_overrides)
    def __consume_parameter_overrides(self):
        for key, value in self.__task_parameter_overrides.items():
            if hasattr(self, key):
                setattr(self, key, value)
            elif hasattr(self, f"_{key}"):
                setattr(self, f"_{key}", value)
            elif hasattr(self, f"__{key}"):
                setattr(self, f"__{key}", value)
            else:
                self.get_logger().error(
                    f"Override '{key}' is not supperted by the task."
                )
        self.__task_parameter_overrides.clear()
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/init.py
@ -0,0 +1,4 @@
 from .reach import Reach
 from .reach_color_image import ReachColorImage
 from .reach_depth_image import ReachDepthImage
 # from .reach_octree import ReachOctree
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach.py
@ -0,0 +1,194 @@
 import abc
 from typing import Tuple
 from geometry_msgs.msg import WrenchStamped
 import gymnasium as gym
 import numpy as np
 from gym_gz.utils.typing import (
    Action,
    ActionSpace,
    Observation,
    ObservationSpace,
    Reward,
 )
 from rbs_gym.envs.tasks.manipulation import Manipulation
 from rbs_gym.envs.utils.math import distance_to_nearest_point
 from std_msgs.msg import Float64MultiArray
 from rbs_gym.envs.observation import TwistSubscriber, JointStates
 class Reach(Manipulation, abc.ABC):
    def __init__(
        self,
        sparse_reward: bool,
        collision_reward: float,
        act_quick_reward: float,
        required_accuracy: float,
        **kwargs,
    ):
        # Initialize the Task base class
        Manipulation.__init__(
            self,
            **kwargs,
        )
        # Additional parameters
        self._sparse_reward: bool = sparse_reward
        self._act_quick_reward = (
            act_quick_reward if act_quick_reward >= 0.0 else -act_quick_reward
        )
        self._collision_reward = (
            collision_reward if collision_reward >= 0.0 else -collision_reward
        )
        self._required_accuracy: float = required_accuracy
        # Flag indicating if the task is done (performance - get_reward + is_done)
        self._is_done: bool = False
        self._is_truncated: bool = False
        self._is_terminated: bool = False
        # Distance to target in the previous step (or after reset)
        self._previous_distance: float = None
        # self._collision_reward: float = collision_reward
        self.initial_gripper_joint_positions = (
            self.robot_model_class.CLOSED_GRIPPER_JOINT_POSITIONS
        )
        self.twist = TwistSubscriber(self,
                                     topic="/cartesian_force_controller/current_twist", 
                                     callback_group=self._callback_group)
        self.joint_states = JointStates(self, topic="/joint_states", callback_group=self._callback_group)
        self._action = WrenchStamped()
        self._action_array: Action = []
    def create_action_space(self) -> ActionSpace:
        # 0:3 - (x, y, z) force
        # 3:6 - (x, y, z) torque
        return gym.spaces.Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32)
    def create_observation_space(self) -> ObservationSpace:
        # 0:3 - (x, y, z) end effector position
        # 3:6 - (x, y, z) target position
        # 6:9 - (x, y, z) current twist
        # Note: These could theoretically be restricted to the workspace and object spawn area instead of inf
        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
    def set_action(self, action: Action):
        # self.get_logger().debug(f"action: {action}")
        # act = Float64MultiArray()
        # act.data = action
        # self.joint_controller.publisher.publish(act)
        # Store action for reward function
        self._action_array = action
        # self._action.header.frame_id = self.robot_ee_link_name
        self._action.header.stamp = self.get_clock().now().to_msg()
        self._action.header.frame_id = self.robot_ee_link_name
        self._action.wrench.force.x = float(action[0]) * 30.0
        self._action.wrench.force.y = float(action[1]) * 30.0
        self._action.wrench.force.z = float(action[2]) * 30.0
        # self._action.wrench.torque.x = float(action[3]) * 10.0
        # self._action.wrench.torque.y = float(action[4]) * 10.0
        # self._action.wrench.torque.z = float(action[5]) * 10.0
        self.controller.publisher.publish(self._action)
    def get_observation(self) -> Observation:
        # Get current end-effector and target positions
        ee_position = self.get_ee_position()
        target_position = self.get_object_position(object_model=self.object_names[0])
        # joint_states = tuple(self.joint_states.get_positions())
        # self.get_logger().warn(f"joint_states: {joint_states[:7]}")
        twist = self.twist.get_observation()
        twt = (twist.twist.linear.x,
               twist.twist.linear.y,
               twist.twist.linear.z,
               twist.twist.angular.x,
               twist.twist.angular.y,
               twist.twist.angular.z)
        # Create the observation
        observation = Observation(
            np.concatenate([ee_position, target_position, twt], dtype=np.float32)
        )
        self.get_logger().debug(f"\nobservation: {observation}")
        # Return the observation
        return observation
    def get_reward(self) -> Reward:
        reward = 0.0
        # Compute the current distance to the target
        current_distance = self.get_distance_to_target()
        # Mark the episode done if target is reached
        if current_distance < self._required_accuracy:
            self._is_terminated = True
            reward += 1.0 if self._sparse_reward else 0.0 # 100.0
        self.get_logger().debug(f"reward_target: {reward}")
        # Give reward based on how much closer robot got relative to the target for dense reward
        if not self._sparse_reward:
            distance_delta = self._previous_distance - current_distance
            reward += distance_delta * 10.0
            self._previous_distance = current_distance
            self.get_logger().debug(f"reward_distance: {reward}")
        if self.check_terrain_collision():
            reward -= self._collision_reward
            self._is_truncated = True
            self.get_logger().debug(f"reward_collision: {reward}")
        # Reward control
        # reward -= np.abs(self._action_array).sum() * 0.01
        # self.get_logger().debug(f"reward_c: {reward}")
        # Subtract a small reward each step to provide incentive to act quickly (if enabled)
        reward += self._act_quick_reward
        self.get_logger().debug(f"reward: {reward}")
        return Reward(reward)
    def is_terminated(self) -> bool:
        self.get_logger().debug(f"terminated: {self._is_terminated}")
        return self._is_terminated
    def is_truncated(self) -> bool:
        self.get_logger().debug(f"truncated: {self._is_truncated}")
        return self._is_truncated
    def reset_task(self):
        Manipulation.reset_task(self)
        self._is_done = False
        self._is_truncated = False
        self._is_terminated = False
        # Compute and store the distance after reset if using dense reward
        if not self._sparse_reward:
            self._previous_distance = self.get_distance_to_target()
        self.get_logger().debug(f"\ntask reset")
    def get_distance_to_target(self) -> Tuple[float, float, float]:
        ee_position = self.get_ee_position()
        object_position = self.get_object_position(object_model=self.object_names[0])
        self.tf2_broadcaster.broadcast_tf("world", "object", object_position, (0.0,0.0,0.0,1.0), xyzw=True)
        return distance_to_nearest_point(origin=ee_position, points=[object_position])
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_color_image.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_color_image.py
@ -0,0 +1,83 @@
 import abc
 import gymnasium as gym
 import numpy as np
 from gym_gz.utils.typing import Observation, ObservationSpace
 from rbs_gym.envs.models.sensors import Camera
 from rbs_gym.envs.observation import CameraSubscriber
 from rbs_gym.envs.tasks.reach import Reach
 class ReachColorImage(Reach, abc.ABC):
    def __init__(
        self,
        camera_width: int,
        camera_height: int,
        camera_type: str = "camera",
        monochromatic: bool = False,
        **kwargs,
    ):
        # Initialize the Task base class
        Reach.__init__(
            self,
            **kwargs,
        )
        # Store parameters for later use
        self._camera_width = camera_width
        self._camera_height = camera_height
        self._monochromatic = monochromatic
        # Perception (RGB camera)
        self.camera_sub = CameraSubscriber(
            node=self,
            topic=Camera.get_color_topic(camera_type),
            is_point_cloud=False,
            callback_group=self._callback_group,
        )
    def create_observation_space(self) -> ObservationSpace:
        # 0:3*height*width - rgb image
        # 0:1*height*width - monochromatic (intensity) image
        return gym.spaces.Box(
            low=0,
            high=255,
            shape=(
                self._camera_height,
                self._camera_width,
                1 if self._monochromatic else 3,
            ),
            dtype=np.uint8,
        )
    def get_observation(self) -> Observation:
        # Get the latest image
        image = self.camera_sub.get_observation()
        assert (
            image.width == self._camera_width and image.height == self._camera_height
        ), f"Error: Resolution of the input image does not match the configured observation space. ({image.width}x{image.height} instead of {self._camera_width}x{self._camera_height})"
        # Reshape and create the observation
        color_image = np.array(image.data, dtype=np.uint8).reshape(
            self._camera_height, self._camera_width, 3
        )
        # # Debug save images
        # from PIL import Image
        # img_color = Image.fromarray(color_image)
        # img_color.save("img_color.png")
        if self._monochromatic:
            observation = Observation(color_image[:, :, 0])
        else:
            observation = Observation(color_image)
        self.get_logger().debug(f"\nobservation: {observation}")
        # Return the observation
        return observation
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_depth_image.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_depth_image.py
@ -0,0 +1,67 @@
 import abc
 import gymnasium as gym
 import numpy as np
 from gym_gz.utils.typing import Observation, ObservationSpace
 from rbs_gym.envs.models.sensors import Camera
 from rbs_gym.envs.observation import CameraSubscriber
 from rbs_gym.envs.tasks.reach import Reach
 class ReachDepthImage(Reach, abc.ABC):
    def __init__(
        self,
        camera_width: int,
        camera_height: int,
        camera_type: str = "depth_camera",
        **kwargs,
    ):
        # Initialize the Task base class
        Reach.__init__(
            self,
            **kwargs,
        )
        # Store parameters for later use
        self._camera_width = camera_width
        self._camera_height = camera_height
        # Perception (depth camera)
        self.camera_sub = CameraSubscriber(
            node=self,
            topic=Camera.get_depth_topic(camera_type),
            is_point_cloud=False,
            callback_group=self._callback_group,
        )
    def create_observation_space(self) -> ObservationSpace:
        # 0:height*width - depth image
        return gym.spaces.Box(
            low=0,
            high=np.inf,
            shape=(self._camera_height, self._camera_width, 1),
            dtype=np.float32,
        )
    def get_observation(self) -> Observation:
        # Get the latest image
        image = self.camera_sub.get_observation()
        # Construct from buffer and reshape
        depth_image = np.frombuffer(image.data, dtype=np.float32).reshape(
            self._camera_height, self._camera_width, 1
        )
        # Replace all instances of infinity with 0
        depth_image[depth_image == np.inf] = 0.0
        # Create the observation
        observation = Observation(depth_image)
        self.get_logger().debug(f"\nobservation: {observation}")
        # Return the observation
        return observation
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/init.py
@ -0,0 +1,3 @@
 from . import conversions, gazebo, logging, math
 from .tf2_broadcaster import Tf2Broadcaster, Tf2BroadcasterStandalone
 from .tf2_listener import Tf2Listener, Tf2ListenerStandalone
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/conversions.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/conversions.py
@ -0,0 +1,183 @@
 from typing import Tuple, Union
 import numpy
 import open3d
 # import pyoctree
 import sensor_msgs
 from scipy.spatial.transform import Rotation
 from sensor_msgs.msg import PointCloud2
 from open3d.geometry import PointCloud
 from geometry_msgs.msg import Transform
 def pointcloud2_to_open3d(
    ros_point_cloud2: PointCloud2,
    include_color: bool = False,
    include_intensity: bool = False,
    # Note: Order does not matter for DL, that's why channel swapping is disabled by default
    fix_rgb_channel_order: bool = False,
 ) -> PointCloud:
    # Create output Open3D PointCloud
    open3d_pc = PointCloud()
    size = ros_point_cloud2.width * ros_point_cloud2.height
    xyz_dtype = ">f4" if ros_point_cloud2.is_bigendian else "<f4"
    xyz = numpy.ndarray(
        shape=(size, 3),
        dtype=xyz_dtype,
        buffer=ros_point_cloud2.data,
        offset=0,
        strides=(ros_point_cloud2.point_step, 4),
    )
    valid_points = numpy.isfinite(xyz).any(axis=1)
    open3d_pc.points = open3d.utility.Vector3dVector(
        xyz[valid_points].astype(numpy.float64)
    )
    if include_color or include_intensity:
        if len(ros_point_cloud2.fields) > 3:
            bgr = numpy.ndarray(
                shape=(size, 3),
                dtype=numpy.uint8,
                buffer=ros_point_cloud2.data,
                offset=ros_point_cloud2.fields[3].offset,
                strides=(ros_point_cloud2.point_step, 1),
            )
            if fix_rgb_channel_order:
                # Swap channels to gain rgb (faster than `bgr[:, [2, 1, 0]]`)
                bgr[:, 0], bgr[:, 2] = bgr[:, 2], bgr[:, 0].copy()
            open3d_pc.colors = open3d.utility.Vector3dVector(
                (bgr[valid_points] / 255).astype(numpy.float64)
            )
        else:
            open3d_pc.colors = open3d.utility.Vector3dVector(
                numpy.zeros((len(valid_points), 3), dtype=numpy.float64)
            )
    # TODO: Update octree creator once L8 image format is supported in Ignition Gazebo
    # elif include_intensity:
    #     # Faster approach, but only the first channel gets the intensity value (rest is 0)
    #     intensities = numpy.zeros((len(valid_points), 3), dtype=numpy.float64)
    #     intensities[:, [0]] = (
    #         numpy.ndarray(
    #             shape=(size, 1),
    #             dtype=numpy.uint8,
    #             buffer=ros_point_cloud2.data,
    #             offset=ros_point_cloud2.fields[3].offset,
    #             strides=(ros_point_cloud2.point_step, 1),
    #         )[valid_points]
    #         / 255
    #     ).astype(numpy.float64)
    #     open3d_pc.colors = open3d.utility.Vector3dVector(intensities)
    #     # # Slower approach, but all channels get the intensity value
    #     # intensities = numpy.ndarray(
    #     #     shape=(size, 1),
    #     #     dtype=numpy.uint8,
    #     #     buffer=ros_point_cloud2.data,
    #     #     offset=ros_point_cloud2.fields[3].offset,
    #     #     strides=(ros_point_cloud2.point_step, 1),
    #     # )
    #     # open3d_pc.colors = open3d.utility.Vector3dVector(
    #     #     numpy.tile(intensities[valid_points] / 255, (1, 3)).astype(numpy.float64)
    #     # )
    # Return the converted Open3D PointCloud
    return open3d_pc
 def transform_to_matrix(transform: Transform) -> numpy.ndarray:
    transform_matrix = numpy.zeros((4, 4))
    transform_matrix[3, 3] = 1.0
    transform_matrix[0:3, 0:3] = open3d.geometry.get_rotation_matrix_from_quaternion(
        [
            transform.rotation.w,
            transform.rotation.x,
            transform.rotation.y,
            transform.rotation.z,
        ]
    )
    transform_matrix[0, 3] = transform.translation.x
    transform_matrix[1, 3] = transform.translation.y
    transform_matrix[2, 3] = transform.translation.z
    return transform_matrix
 # def open3d_point_cloud_to_octree_points(
 #     open3d_point_cloud: PointCloud,
 #     include_color: bool = False,
 #     include_intensity: bool = False,
 # ) -> pyoctree.Points:
 #
 #     octree_points = pyoctree.Points()
 #
 #     if include_color:
 #         features = numpy.reshape(numpy.asarray(open3d_point_cloud.colors), -1)
 #     elif include_intensity:
 #         features = numpy.asarray(open3d_point_cloud.colors)[:, 0]
 #     else:
 #         features = []
 #
 #     octree_points.set_points(
 #         # XYZ points
 #         numpy.reshape(numpy.asarray(open3d_point_cloud.points), -1),
 #         # Normals
 #         numpy.reshape(numpy.asarray(open3d_point_cloud.normals), -1),
 #         # Other features, e.g. color
 #         features,
 #         # Labels - not used
 #         [],
 #     )
 #
 #     return octree_points
 def orientation_6d_to_quat(
    v1: Tuple[float, float, float], v2: Tuple[float, float, float]
 ) -> Tuple[float, float, float, float]:
    # Normalize vectors
    col1 = v1 / numpy.linalg.norm(v1)
    col2 = v2 / numpy.linalg.norm(v2)
    # Find their orthogonal vector via cross product
    col3 = numpy.cross(col1, col2)
    # Stack into rotation matrix as columns, convert to quaternion and return
    quat_xyzw = Rotation.from_matrix(numpy.array([col1, col2, col3]).T).as_quat()
    return quat_xyzw
 def orientation_quat_to_6d(
    quat_xyzw: Tuple[float, float, float, float]
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float]]:
    # Convert quaternion into rotation matrix
    rot_mat = Rotation.from_quat(quat_xyzw).as_matrix()
    # Return first two columns (already normalised)
    return (tuple(rot_mat[:, 0]), tuple(rot_mat[:, 1]))
 def quat_to_wxyz(
    xyzw: Union[numpy.ndarray, Tuple[float, float, float, float]]
 ) -> numpy.ndarray:
    if isinstance(xyzw, tuple):
        return (xyzw[3], xyzw[0], xyzw[1], xyzw[2])
    return xyzw[[3, 0, 1, 2]]
 def quat_to_xyzw(
    wxyz: Union[numpy.ndarray, Tuple[float, float, float, float]]
 ) -> numpy.ndarray:
    if isinstance(wxyz, tuple):
        return (wxyz[1], wxyz[2], wxyz[3], wxyz[0])
    return wxyz[[1, 2, 3, 0]]
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/gazebo.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/gazebo.py
@ -0,0 +1,260 @@
 from typing import Tuple, Union
 from gym_gz.scenario.model_wrapper import ModelWrapper
 from numpy import exp
 from scenario.bindings.gazebo import Link, World
 from scipy.spatial.transform import Rotation
 from rbs_gym.envs.utils.conversions import quat_to_wxyz, quat_to_xyzw
 from rbs_gym.envs.utils.math import quat_mul
 def get_model_pose(
    world: World,
    model: Union[ModelWrapper, str],
    link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    """
    Return pose of model's link. Orientation is represented as wxyz quaternion or xyzw based on the passed argument `xyzw`.
    """
    if isinstance(model, str):
        # Get reference to the model from its name if string is passed
        model = world.to_gazebo().get_model(model).to_gazebo()
    if link is None:
        # Use the first link if not specified
        link = model.get_link(link_name=model.link_names()[0])
    elif isinstance(link, str):
        # Get reference to the link from its name if string
        link = model.get_link(link_name=link)
    # Get position and orientation
    position = link.position()
    quat = link.orientation()
    # Convert to xyzw order if desired
    if xyzw:
        quat = quat_to_xyzw(quat)
    # Return pose of the model's link
    return (
        position,
        quat,
    )
 def get_model_position(
    world: World,
    model: Union[ModelWrapper, str],
    link: Union[Link, str, None] = None,
 ) -> Tuple[float, float, float]:
    """
    Return position of model's link.
    """
    if isinstance(model, str):
        # Get reference to the model from its name if string is passed
        model = world.to_gazebo().get_model(model).to_gazebo()
    if link is None:
        # Use the first link if not specified
        link = model.get_link(link_name=model.link_names()[0])
    elif isinstance(link, str):
        # Get reference to the link from its name if string
        link = model.get_link(link_name=link)
    # Return position of the model's link
    return link.position()
 def get_model_orientation(
    world: World,
    model: Union[ModelWrapper, str],
    link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[float, float, float, float]:
    """
    Return orientation of model's link that is represented as wxyz quaternion or xyzw based on the passed argument `xyzw`.
    """
    if isinstance(model, str):
        # Get reference to the model from its name if string is passed
        model = world.to_gazebo().get_model(model).to_gazebo()
    if link is None:
        # Use the first link if not specified
        link = model.get_link(link_name=model.link_names()[0])
    elif isinstance(link, str):
        # Get reference to the link from its name if string
        link = model.get_link(link_name=link)
    # Get orientation
    quat = link.orientation()
    # Convert to xyzw order if desired
    if xyzw:
        quat = quat_to_xyzw(quat)
    # Return orientation of the model's link
    return quat
 def transform_move_to_model_pose(
    world: World,
    position: Tuple[float, float, float],
    quat: Tuple[float, float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    """
    Transform such that original `position` and `quat` are represented with respect to `target_model::target_link`.
    The resulting pose is still represented in world coordinate system.
    """
    target_frame_position, target_frame_quat = get_model_pose(
        world,
        model=target_model,
        link=target_link,
        xyzw=True,
    )
    transformed_position = Rotation.from_quat(target_frame_quat).apply(position)
    transformed_position = (
        transformed_position[0] + target_frame_position[0],
        transformed_position[1] + target_frame_position[1],
        transformed_position[2] + target_frame_position[2],
    )
    if not xyzw:
        target_frame_quat = quat_to_wxyz(target_frame_quat)
    transformed_quat = quat_mul(quat, target_frame_quat, xyzw=xyzw)
    return (transformed_position, transformed_quat)
 def transform_move_to_model_position(
    world: World,
    position: Tuple[float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    target_frame_position, target_frame_quat_xyzw = get_model_pose(
        world,
        model=target_model,
        link=target_link,
        xyzw=True,
    )
    transformed_position = Rotation.from_quat(target_frame_quat_xyzw).apply(position)
    transformed_position = (
        target_frame_position[0] + transformed_position[0],
        target_frame_position[1] + transformed_position[1],
        target_frame_position[2] + transformed_position[2],
    )
    return transformed_position
 def transform_move_to_model_orientation(
    world: World,
    quat: Tuple[float, float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    target_frame_quat = get_model_orientation(
        world,
        model=target_model,
        link=target_link,
        xyzw=xyzw,
    )
    transformed_quat = quat_mul(quat, target_frame_quat, xyzw=xyzw)
    return transformed_quat
 def transform_change_reference_frame_pose(
    world: World,
    position: Tuple[float, float, float],
    quat: Tuple[float, float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    """
    Change reference frame of original `position` and `quat` from world coordinate system to `target_model::target_link` coordinate system.
    """
    target_frame_position, target_frame_quat = get_model_pose(
        world,
        model=target_model,
        link=target_link,
        xyzw=True,
    )
    transformed_position = (
        position[0] - target_frame_position[0],
        position[1] - target_frame_position[1],
        position[2] - target_frame_position[2],
    )
    transformed_position = Rotation.from_quat(target_frame_quat).apply(
        transformed_position, inverse=True
    )
    if not xyzw:
        target_frame_quat = quat_to_wxyz(target_frame_quat)
    transformed_quat = quat_mul(target_frame_quat, quat, xyzw=xyzw)
    return (tuple(transformed_position), transformed_quat)
 def transform_change_reference_frame_position(
    world: World,
    position: Tuple[float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    target_frame_position, target_frame_quat_xyzw = get_model_pose(
        world,
        model=target_model,
        link=target_link,
        xyzw=True,
    )
    transformed_position = (
        position[0] - target_frame_position[0],
        position[1] - target_frame_position[1],
        position[2] - target_frame_position[2],
    )
    transformed_position = Rotation.from_quat(target_frame_quat_xyzw).apply(
        transformed_position, inverse=True
    )
    return tuple(transformed_position)
 def transform_change_reference_frame_orientation(
    world: World,
    quat: Tuple[float, float, float, float],
    target_model: Union[ModelWrapper, str],
    target_link: Union[Link, str, None] = None,
    xyzw: bool = False,
 ) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
    target_frame_quat = get_model_orientation(
        world,
        model=target_model,
        link=target_link,
        xyzw=xyzw,
    )
    transformed_quat = quat_mul(target_frame_quat, quat, xyzw=xyzw)
    return transformed_quat
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/logging.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/logging.py
@ -0,0 +1,25 @@
 from typing import Union
 from gymnasium import logger as gym_logger
 from gym_gz.utils import logger as gym_ign_logger
 def set_log_level(log_level: Union[int, str]):
    """
    Set log level for (Gym) Ignition.
    """
    if not isinstance(log_level, int):
        log_level = str(log_level).upper()
        if "WARNING" == log_level:
            log_level = "WARN"
        elif not log_level in ["DEBUG", "INFO", "WARN", "ERROR", "DISABLED"]:
            log_level = "DISABLED"
        log_level = getattr(gym_logger, log_level)
    gym_ign_logger.set_level(
        level=log_level,
        scenario_level=log_level,
    )
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/math.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/math.py
@ -0,0 +1,46 @@
 from typing import List, Tuple
 import numpy as np
 def quat_mul(
    quat_0: Tuple[float, float, float, float],
    quat_1: Tuple[float, float, float, float],
    xyzw: bool = True,
 ) -> Tuple[float, float, float, float]:
    """
    Multiply two quaternions
    """
    if xyzw:
        x0, y0, z0, w0 = quat_0
        x1, y1, z1, w1 = quat_1
        return (
            x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
            -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
            x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0,
            -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
        )
    else:
        w0, x0, y0, z0 = quat_0
        w1, x1, y1, z1 = quat_1
        return (
            -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
            x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
            -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
            x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0,
        )
 def distance_to_nearest_point(
    origin: Tuple[float, float, float], points: List[Tuple[float, float, float]]
 ) -> float:
    return np.linalg.norm(np.array(points) - np.array(origin), axis=1).min()
 def get_nearest_point(
    origin: Tuple[float, float, float], points: List[Tuple[float, float, float]]
 ) -> Tuple[float, float, float]:
    target_distances = np.linalg.norm(np.array(points) - np.array(origin), axis=1)
    return points[target_distances.argmin()]
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_broadcaster.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_broadcaster.py
@ -0,0 +1,74 @@
 import sys
 from typing import Tuple
 import rclpy
 from geometry_msgs.msg import TransformStamped
 from rclpy.node import Node
 from rclpy.parameter import Parameter
 from tf2_ros import StaticTransformBroadcaster
 class Tf2Broadcaster:
    def __init__(
        self,
        node: Node,
    ):
        self._node = node
        self.__tf2_broadcaster = StaticTransformBroadcaster(node=self._node)
        self._transform_stamped = TransformStamped()
    def broadcast_tf(
        self,
        parent_frame_id: str,
        child_frame_id: str,
        translation: Tuple[float, float, float],
        rotation: Tuple[float, float, float, float],
        xyzw: bool = True,
    ):
        """
        Broadcast transformation of the camera
        """
        self._transform_stamped.header.frame_id = parent_frame_id
        self._transform_stamped.child_frame_id = child_frame_id
        self._transform_stamped.header.stamp = self._node.get_clock().now().to_msg()
        self._transform_stamped.transform.translation.x = float(translation[0])
        self._transform_stamped.transform.translation.y = float(translation[1])
        self._transform_stamped.transform.translation.z = float(translation[2])
        if xyzw:
            self._transform_stamped.transform.rotation.x = float(rotation[0])
            self._transform_stamped.transform.rotation.y = float(rotation[1])
            self._transform_stamped.transform.rotation.z = float(rotation[2])
            self._transform_stamped.transform.rotation.w = float(rotation[3])
        else:
            self._transform_stamped.transform.rotation.w = float(rotation[0])
            self._transform_stamped.transform.rotation.x = float(rotation[1])
            self._transform_stamped.transform.rotation.y = float(rotation[2])
            self._transform_stamped.transform.rotation.z = float(rotation[3])
        self.__tf2_broadcaster.sendTransform(self._transform_stamped)
 class Tf2BroadcasterStandalone(Node, Tf2Broadcaster):
    def __init__(
        self,
        node_name: str = "rbs_gym_tf_broadcaster",
        use_sim_time: bool = True,
    ):
        try:
            rclpy.init()
        except Exception as e:
            if not rclpy.ok():
                sys.exit(f"ROS 2 context could not be initialised: {e}")
        Node.__init__(self, node_name)
        self.set_parameters(
            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
        )
        Tf2Broadcaster.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_listener.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_listener.py
@ -0,0 +1,74 @@
 import sys
 from typing import Optional
 import rclpy
 from geometry_msgs.msg import Transform
 from rclpy.node import Node
 from rclpy.parameter import Parameter
 from tf2_ros import Buffer, TransformListener
 class Tf2Listener:
    def __init__(
        self,
        node: Node,
    ):
        self._node = node
        # Create tf2 buffer and listener for transform lookup
        self.__tf2_buffer = Buffer()
        TransformListener(buffer=self.__tf2_buffer, node=node)
    def lookup_transform_sync(
        self, target_frame: str, source_frame: str, retry: bool = True
    ) -> Optional[Transform]:
        try:
            return self.__tf2_buffer.lookup_transform(
                target_frame=target_frame,
                source_frame=source_frame,
                time=rclpy.time.Time(),
            ).transform
        except:
            if retry:
                while rclpy.ok():
                    if self.__tf2_buffer.can_transform(
                        target_frame=target_frame,
                        source_frame=source_frame,
                        time=rclpy.time.Time(),
                        timeout=rclpy.time.Duration(seconds=1, nanoseconds=0),
                    ):
                        return self.__tf2_buffer.lookup_transform(
                            target_frame=target_frame,
                            source_frame=source_frame,
                            time=rclpy.time.Time(),
                        ).transform
                    self._node.get_logger().warn(
                        f'Lookup of transform from "{source_frame}"'
                        f' to "{target_frame}" failed, retrying...'
                    )
            else:
                return None
 class Tf2ListenerStandalone(Node, Tf2Listener):
    def __init__(
        self,
        node_name: str = "rbs_gym_tf_listener",
        use_sim_time: bool = True,
    ):
        try:
            rclpy.init()
        except Exception as e:
            if not rclpy.ok():
                sys.exit(f"ROS 2 context could not be initialised: {e}")
        Node.__init__(self, node_name)
        self.set_parameters(
            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
        )
        Tf2Listener.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/worlds/default.sdf
+++ b/env_manager/rbs_gym/rbs_gym/envs/worlds/default.sdf
@ -0,0 +1,24 @@
 <?xml version="1.0"?>
 <sdf version="1.9">
  <world name="rbs_gym_world">
    <!-- <physics name='1ms' type='ignored'> -->
    <!--   <dart> -->
    <!--     <collision_detector>bullet</collision_detector> -->
    <!--     <solver> -->
    <!--       <solver_type>pgs</solver_type> -->
    <!--     </solver> -->
    <!--   </dart> -->
    <!-- </physics> -->
    <!-- <plugin name='ignition::gazebo::systems::Contact' filename='ignition-gazebo-contact-system'/> -->
    <!--       -->
    <!-- Scene -->
    <!--       -->
    <gravity>0 0 0</gravity>
    <scene>
      <ambient>1.0 1.0 1.0</ambient>
      <grid>false</grid>
    </scene>
  </world>
 </sdf>
--- a/env_manager/rbs_gym/rbs_gym/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/init.py
@ -0,0 +1,9 @@
 from .utils import (
    ALGOS,
    create_test_env,
    get_latest_run_id,
    get_saved_hyperparams,
    get_trained_models,
    get_wrapper_class,
    linear_schedule,
 )
--- a/env_manager/rbs_gym/rbs_gym/utils/callbacks.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/callbacks.py
@ -0,0 +1,306 @@
 import os
 import tempfile
 import time
 from copy import deepcopy
 from functools import wraps
 from threading import Thread
 from typing import Optional
 import optuna
 from sb3_contrib import TQC
 from stable_baselines3 import SAC
 from stable_baselines3.common.callbacks import (
    BaseCallback,
    CheckpointCallback,
    EvalCallback,
 )
 from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv
 class TrialEvalCallback(EvalCallback):
    """
    Callback used for evaluating and reporting a trial.
    """
    def __init__(
        self,
        eval_env: VecEnv,
        trial: optuna.Trial,
        n_eval_episodes: int = 5,
        eval_freq: int = 10000,
        deterministic: bool = True,
        verbose: int = 0,
        best_model_save_path: Optional[str] = None,
        log_path: Optional[str] = None,
    ):
        super(TrialEvalCallback, self).__init__(
            eval_env=eval_env,
            n_eval_episodes=n_eval_episodes,
            eval_freq=eval_freq,
            deterministic=deterministic,
            verbose=verbose,
            best_model_save_path=best_model_save_path,
            log_path=log_path,
        )
        self.trial = trial
        self.eval_idx = 0
        self.is_pruned = False
    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            print("Evaluating trial")
            super(TrialEvalCallback, self)._on_step()
            self.eval_idx += 1
            # report best or report current ?
            # report num_timesteps or elasped time ?
            self.trial.report(self.last_mean_reward, self.eval_idx)
            # Prune trial if need
            if self.trial.should_prune():
                self.is_pruned = True
                return False
        return True
 class ParallelTrainCallback(BaseCallback):
    """
    Callback to explore (collect experience) and train (do gradient steps)
    at the same time using two separate threads.
    Normally used with off-policy algorithms and `train_freq=(1, "episode")`.
    - blocking mode: wait for the model to finish updating the policy before collecting new experience
        at the end of a rollout
    - force sync mode: stop training to update to the latest policy for collecting
        new experience
    :param gradient_steps: Number of gradient steps to do before
        sending the new policy
    :param verbose: Verbosity level
    :param sleep_time: Limit the fps in the thread collecting experience.
    """
    def __init__(
        self, gradient_steps: int = 100, verbose: int = 0, sleep_time: float = 0.0
    ):
        super(ParallelTrainCallback, self).__init__(verbose)
        self.batch_size = 0
        self._model_ready = True
        self._model = None
        self.gradient_steps = gradient_steps
        self.process = None
        self.model_class = None
        self.sleep_time = sleep_time
    def _init_callback(self) -> None:
        temp_file = tempfile.TemporaryFile()
        # Windows TemporaryFile is not a io Buffer
        # we save the model in the logs/ folder
        if os.name == "nt":
            temp_file = os.path.join("logs", "model_tmp.zip")
        self.model.save(temp_file)
        # TODO (external): add support for other algorithms
        for model_class in [SAC, TQC]:
            if isinstance(self.model, model_class):
                self.model_class = model_class
                break
        assert (
            self.model_class is not None
        ), f"{self.model} is not supported for parallel training"
        self._model = self.model_class.load(temp_file)
        self.batch_size = self._model.batch_size
        # Disable train method
        def patch_train(function):
            @wraps(function)
            def wrapper(*args, **kwargs):
                return
            return wrapper
        # Add logger for parallel training
        self._model.set_logger(self.model.logger)
        self.model.train = patch_train(self.model.train)
        # Hack: Re-add correct values at save time
        def patch_save(function):
            @wraps(function)
            def wrapper(*args, **kwargs):
                return self._model.save(*args, **kwargs)
            return wrapper
        self.model.save = patch_save(self.model.save)
    def train(self) -> None:
        self._model_ready = False
        self.process = Thread(target=self._train_thread, daemon=True)
        self.process.start()
    def _train_thread(self) -> None:
        self._model.train(
            gradient_steps=self.gradient_steps, batch_size=self.batch_size
        )
        self._model_ready = True
    def _on_step(self) -> bool:
        if self.sleep_time > 0:
            time.sleep(self.sleep_time)
        return True
    def _on_rollout_end(self) -> None:
        if self._model_ready:
            self._model.replay_buffer = deepcopy(self.model.replay_buffer)
            self.model.set_parameters(deepcopy(self._model.get_parameters()))
            self.model.actor = self.model.policy.actor
            if self.num_timesteps >= self._model.learning_starts:
                self.train()
            # Do not wait for the training loop to finish
            # self.process.join()
    def _on_training_end(self) -> None:
        # Wait for the thread to terminate
        if self.process is not None:
            if self.verbose > 0:
                print("Waiting for training thread to terminate")
            self.process.join()
 class SaveVecNormalizeCallback(BaseCallback):
    """
    Callback for saving a VecNormalize wrapper every ``save_freq`` steps
    :param save_freq: (int)
    :param save_path: (str) Path to the folder where ``VecNormalize`` will be saved, as ``vecnormalize.pkl``
    :param name_prefix: (str) Common prefix to the saved ``VecNormalize``, if None (default)
        only one file will be kept.
    """
    def __init__(
        self,
        save_freq: int,
        save_path: str,
        name_prefix: Optional[str] = None,
        verbose: int = 0,
    ):
        super(SaveVecNormalizeCallback, self).__init__(verbose)
        self.save_freq = save_freq
        self.save_path = save_path
        self.name_prefix = name_prefix
    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
    def _on_step(self) -> bool:
        if self.n_calls % self.save_freq == 0:
            if self.name_prefix is not None:
                path = os.path.join(
                    self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps.pkl"
                )
            else:
                path = os.path.join(self.save_path, "vecnormalize.pkl")
            if self.model.get_vec_normalize_env() is not None:
                self.model.get_vec_normalize_env().save(path)
                if self.verbose > 1:
                    print(f"Saving VecNormalize to {path}")
        return True
 class CheckpointCallbackWithReplayBuffer(CheckpointCallback):
    """
    Callback for saving a model every ``save_freq`` steps
    :param save_freq:
    :param save_path: Path to the folder where the model will be saved.
    :param name_prefix: Common prefix to the saved models
    :param save_replay_buffer: If enabled, save replay buffer together with model (if supported by algorithm).
    :param verbose:
    """
    def __init__(
        self,
        save_freq: int,
        save_path: str,
        name_prefix: str = "rl_model",
        save_replay_buffer: bool = False,
        verbose: int = 0,
    ):
        super(CheckpointCallbackWithReplayBuffer, self).__init__(
            save_freq, save_path, name_prefix, verbose
        )
        self.save_replay_buffer = save_replay_buffer
        # self.save_replay_buffer = hasattr(self.model, "save_replay_buffer") and save_replay_buffer
    def _on_step(self) -> bool:
        if self.n_calls % self.save_freq == 0:
            path = os.path.join(
                self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps"
            )
            self.model.save(path)
            if self.verbose > 0:
                print(f"Saving model checkpoint to {path}")
            if self.save_replay_buffer:
                path_replay_buffer = os.path.join(self.save_path, "replay_buffer.pkl")
                self.model.save_replay_buffer(path_replay_buffer)
                if self.verbose > 0:
                    print(f"Saving model checkpoint to {path_replay_buffer}")
        return True
 class CurriculumLoggerCallback(BaseCallback):
    """
    Custom callback for logging curriculum values.
    """
    def __init__(self, verbose=0):
        super(CurriculumLoggerCallback, self).__init__(verbose)
    def _on_step(self) -> bool:
        for infos in self.locals["infos"]:
            for info_key, info_value in infos.items():
                if not (
                    info_key.startswith("curriculum")
                    and info_key.count("__mean_step__")
                ):
                    continue
                self.logger.record_mean(
                    key=info_key.replace("__mean_step__", ""), value=info_value
                )
        return True
    def _on_rollout_end(self) -> None:
        for infos in self.locals["infos"]:
            for info_key, info_value in infos.items():
                if not info_key.startswith("curriculum"):
                    continue
                if info_key.count("__mean_step__"):
                    continue
                if info_key.count("__mean_episode__"):
                    self.logger.record_mean(
                        key=info_key.replace("__mean_episode__", ""), value=info_value
                    )
                else:
                    if isinstance(info_value, str):
                        exclude = "tensorboard"
                    else:
                        exclude = None
                    self.logger.record(key=info_key, value=info_value, exclude=exclude)
 class MetricsCallback(BaseCallback):
    def __init__(self, verbose: int = 0):
        super(MetricsCallback, self).__init__(verbose)
    def _on_step(self) -> bool:
        pass
--- a/env_manager/rbs_gym/rbs_gym/utils/exp_manager.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/exp_manager.py
@ -0,0 +1,931 @@
 import argparse
 import os
 import pickle as pkl
 import time
 import warnings
 from collections import OrderedDict
 from pprint import pprint
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import gymnasium as gym
 import numpy as np
 import optuna
 import yaml
 from optuna.integration.skopt import SkoptSampler
 from optuna.pruners import BasePruner, MedianPruner, NopPruner, SuccessiveHalvingPruner
 from optuna.samplers import BaseSampler, RandomSampler, TPESampler
 from optuna.visualization import plot_optimization_history, plot_param_importances
 from stable_baselines3 import HerReplayBuffer
 from stable_baselines3.common.base_class import BaseAlgorithm
 from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
 from stable_baselines3.common.env_util import make_vec_env
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.noise import (
    NormalActionNoise,
    OrnsteinUhlenbeckActionNoise,
 )
 from stable_baselines3.common.preprocessing import (
    is_image_space,
    is_image_space_channels_first,
 )
 from stable_baselines3.common.utils import constant_fn
 from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    SubprocVecEnv,
    VecEnv,
    VecFrameStack,
    VecNormalize,
    VecTransposeImage,
    is_vecenv_wrapped,
 )
 from torch import nn as nn
 from rbs_gym.utils.callbacks import (
    CheckpointCallbackWithReplayBuffer,
    SaveVecNormalizeCallback,
    TrialEvalCallback,
 )
 from rbs_gym.utils.hyperparams_opt import HYPERPARAMS_SAMPLER
 from rbs_gym.utils.utils import (
    ALGOS,
    get_callback_list,
    get_latest_run_id,
    get_wrapper_class,
    linear_schedule,
 )
 class ExperimentManager(object):
    """
    Experiment manager: read the hyperparameters,
    preprocess them, create the environment and the RL model.
    Please take a look at `train.py` to have the details for each argument.
    """
    def __init__(
        self,
        args: argparse.Namespace,
        algo: str,
        env_id: str,
        log_folder: str,
        tensorboard_log: str = "",
        n_timesteps: int = 0,
        eval_freq: int = 10000,
        n_eval_episodes: int = 5,
        save_freq: int = -1,
        hyperparams: Optional[Dict[str, Any]] = None,
        env_kwargs: Optional[Dict[str, Any]] = None,
        trained_agent: str = "",
        optimize_hyperparameters: bool = False,
        storage: Optional[str] = None,
        study_name: Optional[str] = None,
        n_trials: int = 1,
        n_jobs: int = 1,
        sampler: str = "tpe",
        pruner: str = "median",
        optimization_log_path: Optional[str] = None,
        n_startup_trials: int = 0,
        n_evaluations: int = 1,
        truncate_last_trajectory: bool = False,
        uuid_str: str = "",
        seed: int = 0,
        log_interval: int = 0,
        save_replay_buffer: bool = False,
        preload_replay_buffer: str = "",
        verbose: int = 1,
        vec_env_type: str = "dummy",
        n_eval_envs: int = 1,
        no_optim_plots: bool = False,
    ):
        super(ExperimentManager, self).__init__()
        self.algo = algo
        self.env_id = env_id
        # Custom params
        self.custom_hyperparams = hyperparams
        self.env_kwargs = {} if env_kwargs is None else env_kwargs
        self.n_timesteps = n_timesteps
        self.normalize = False
        self.normalize_kwargs = {}
        self.env_wrapper = None
        self.frame_stack = None
        self.seed = seed
        self.optimization_log_path = optimization_log_path
        self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[
            vec_env_type
        ]
        self.vec_env_kwargs = {}
        # self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}
        # Callbacks
        self.specified_callbacks = []
        self.callbacks = []
        self.save_freq = save_freq
        self.eval_freq = eval_freq
        self.n_eval_episodes = n_eval_episodes
        self.n_eval_envs = n_eval_envs
        self.n_envs = 1  # it will be updated when reading hyperparams
        self.n_actions = None  # For DDPG/TD3 action noise objects
        self._hyperparams = {}
        self.trained_agent = trained_agent
        self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(
            trained_agent
        )
        self.truncate_last_trajectory = truncate_last_trajectory
        self.preload_replay_buffer = preload_replay_buffer
        self._is_atari = self.is_atari(env_id)
        self._is_gazebo_env = self.is_gazebo_env(env_id)
        # Hyperparameter optimization config
        self.optimize_hyperparameters = optimize_hyperparameters
        self.storage = storage
        self.study_name = study_name
        self.no_optim_plots = no_optim_plots
        # maximum number of trials for finding the best hyperparams
        self.n_trials = n_trials
        # number of parallel jobs when doing hyperparameter search
        self.n_jobs = n_jobs
        self.sampler = sampler
        self.pruner = pruner
        self.n_startup_trials = n_startup_trials
        self.n_evaluations = n_evaluations
        self.deterministic_eval = not self.is_atari(self.env_id)
        # Logging
        self.log_folder = log_folder
        self.tensorboard_log = (
            None if tensorboard_log == "" else os.path.join(tensorboard_log, env_id)
        )
        self.verbose = verbose
        self.args = args
        self.log_interval = log_interval
        self.save_replay_buffer = save_replay_buffer
        self.log_path = f"{log_folder}/{self.algo}/"
        self.save_path = os.path.join(
            self.log_path,
            f"{self.env_id}_{get_latest_run_id(self.log_path, self.env_id) + 1}{uuid_str}",
        )
        self.params_path = f"{self.save_path}/{self.env_id}"
    def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
        """
        Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
        create the environment and possibly the model.
        :return: the initialized RL model
        """
        hyperparams, saved_hyperparams = self.read_hyperparameters()
        hyperparams, self.env_wrapper, self.callbacks = self._preprocess_hyperparams(
            hyperparams
        )
        # Create env to have access to action space for action noise
        self._env = self.create_envs(self.n_envs, no_log=False)
        self.create_log_folder()
        self.create_callbacks()
        self._hyperparams = self._preprocess_action_noise(hyperparams, self._env)
        if self.continue_training:
            model = self._load_pretrained_agent(self._hyperparams, self._env)
        elif self.optimize_hyperparameters:
            return None
        else:
            # Train an agent from scratch
            model = ALGOS[self.algo](
                env=self._env,
                tensorboard_log=self.tensorboard_log,
                seed=self.seed,
                verbose=self.verbose,
                **self._hyperparams,
            )
        # Pre-load replay buffer if enabled
        if self.preload_replay_buffer:
            if self.preload_replay_buffer.endswith(".pkl"):
                replay_buffer_path = self.preload_replay_buffer
            else:
                replay_buffer_path = os.path.join(
                    self.preload_replay_buffer, "replay_buffer.pkl"
                )
            if os.path.exists(replay_buffer_path):
                print("Pre-loading replay buffer")
                if self.algo == "her":
                    model.load_replay_buffer(
                        replay_buffer_path, self.truncate_last_trajectory
                    )
                else:
                    model.load_replay_buffer(replay_buffer_path)
            else:
                raise Exception(f"Replay buffer {replay_buffer_path} " "does not exist")
        self._save_config(saved_hyperparams)
        return model, saved_hyperparams
    def learn(self, model: BaseAlgorithm) -> None:
        """
        :param model: an initialized RL model
        """
        kwargs = {}
        if self.log_interval > -1:
            kwargs = {"log_interval": self.log_interval}
        if len(self.callbacks) > 0:
            kwargs["callback"] = self.callbacks
        if self.continue_training:
            kwargs["reset_num_timesteps"] = False
            model.env.reset()
        try:
            model.learn(self.n_timesteps, **kwargs)
        except Exception as e:
            print(f"Caught an exception during training of the model: {e}")
            self.save_trained_model(model)
        finally:
            # Release resources
            try:
                model.env.close()
            except EOFError:
                pass
    def save_trained_model(self, model: BaseAlgorithm) -> None:
        """
        Save trained model optionally with its replay buffer
        and ``VecNormalize`` statistics
        :param model:
        """
        print(f"Saving to {self.save_path}")
        model.save(f"{self.save_path}/{self.env_id}")
        if hasattr(model, "save_replay_buffer") and self.save_replay_buffer:
            print("Saving replay buffer")
            model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl"))
        if self.normalize:
            # Important: save the running average, for testing the agent we need that normalization
            model.get_vec_normalize_env().save(
                os.path.join(self.params_path, "vecnormalize.pkl")
            )
    def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
        """
        Save unprocessed hyperparameters, this can be use later
        to reproduce an experiment.
        :param saved_hyperparams:
        """
        # Save hyperparams
        with open(os.path.join(self.params_path, "config.yml"), "w") as f:
            yaml.dump(saved_hyperparams, f)
        # save command line arguments
        with open(os.path.join(self.params_path, "args.yml"), "w") as f:
            ordered_args = OrderedDict(
                [(key, vars(self.args)[key]) for key in sorted(vars(self.args).keys())]
            )
            yaml.dump(ordered_args, f)
        print(f"Log path: {self.save_path}")
    def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        # Load hyperparameters from yaml file
        hyperparams_dir = os.path.abspath(
            os.path.join(
                os.path.realpath(__file__), *3 * [os.path.pardir], "hyperparams"
            )
        )
        with open(f"{hyperparams_dir}/{self.algo}.yml", "r") as f:
            hyperparams_dict = yaml.safe_load(f)
            if self.env_id in list(hyperparams_dict.keys()):
                hyperparams = hyperparams_dict[self.env_id]
            elif self._is_atari:
                hyperparams = hyperparams_dict["atari"]
            else:
                raise ValueError(
                    f"Hyperparameters not found for {self.algo}-{self.env_id}"
                )
        if self.custom_hyperparams is not None:
            # Overwrite hyperparams if needed
            hyperparams.update(self.custom_hyperparams)
        # Sort hyperparams that will be saved
        saved_hyperparams = OrderedDict(
            [(key, hyperparams[key]) for key in sorted(hyperparams.keys())]
        )
        if self.verbose > 0:
            print(
                "Default hyperparameters for environment (ones being tuned will be overridden):"
            )
            pprint(saved_hyperparams)
        return hyperparams, saved_hyperparams
    @staticmethod
    def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
        # Create schedules
        for key in ["learning_rate", "clip_range", "clip_range_vf"]:
            if key not in hyperparams:
                continue
            if isinstance(hyperparams[key], str):
                schedule, initial_value = hyperparams[key].split("_")
                initial_value = float(initial_value)
                hyperparams[key] = linear_schedule(initial_value)
            elif isinstance(hyperparams[key], (float, int)):
                # Negative value: ignore (ex: for clipping)
                if hyperparams[key] < 0:
                    continue
                hyperparams[key] = constant_fn(float(hyperparams[key]))
            else:
                raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
        return hyperparams
    def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
        if "normalize" in hyperparams.keys():
            self.normalize = hyperparams["normalize"]
            # Special case, instead of both normalizing
            # both observation and reward, we can normalize one of the two.
            # in that case `hyperparams["normalize"]` is a string
            # that can be evaluated as python,
            # ex: "dict(norm_obs=False, norm_reward=True)"
            if isinstance(self.normalize, str):
                self.normalize_kwargs = eval(self.normalize)
                self.normalize = True
            # Use the same discount factor as for the algorithm
            if "gamma" in hyperparams:
                self.normalize_kwargs["gamma"] = hyperparams["gamma"]
            del hyperparams["normalize"]
        return hyperparams
    def _preprocess_hyperparams(
        self, hyperparams: Dict[str, Any]
    ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback]]:
        self.n_envs = hyperparams.get("n_envs", 1)
        if self.verbose > 0:
            print(f"Using {self.n_envs} environments")
        # Convert schedule strings to objects
        hyperparams = self._preprocess_schedules(hyperparams)
        # Pre-process train_freq
        if "train_freq" in hyperparams and isinstance(hyperparams["train_freq"], list):
            hyperparams["train_freq"] = tuple(hyperparams["train_freq"])
        # Should we overwrite the number of timesteps?
        if self.n_timesteps > 0:
            if self.verbose:
                print(f"Overwriting n_timesteps with n={self.n_timesteps}")
        else:
            self.n_timesteps = int(hyperparams["n_timesteps"])
        # Pre-process normalize config
        hyperparams = self._preprocess_normalization(hyperparams)
        # Pre-process policy/buffer keyword arguments
        # Convert to python object if needed
        # TODO: Use the new replay_buffer_class argument of offpolicy algorithms instead of monkey patch
        for kwargs_key in {
            "policy_kwargs",
            "replay_buffer_class",
            "replay_buffer_kwargs",
        }:
            if kwargs_key in hyperparams.keys() and isinstance(
                hyperparams[kwargs_key], str
            ):
                hyperparams[kwargs_key] = eval(hyperparams[kwargs_key])
        # Delete keys so the dict can be pass to the model constructor
        if "n_envs" in hyperparams.keys():
            del hyperparams["n_envs"]
        del hyperparams["n_timesteps"]
        if "frame_stack" in hyperparams.keys():
            self.frame_stack = hyperparams["frame_stack"]
            del hyperparams["frame_stack"]
        # obtain a class object from a wrapper name string in hyperparams
        # and delete the entry
        env_wrapper = get_wrapper_class(hyperparams)
        if "env_wrapper" in hyperparams.keys():
            del hyperparams["env_wrapper"]
        callbacks = get_callback_list(hyperparams)
        if "callback" in hyperparams.keys():
            self.specified_callbacks = hyperparams["callback"]
            del hyperparams["callback"]
        return hyperparams, env_wrapper, callbacks
    def _preprocess_action_noise(
        self, hyperparams: Dict[str, Any], env: VecEnv
    ) -> Dict[str, Any]:
        # Parse noise string
        # Note: only off-policy algorithms are supported
        if hyperparams.get("noise_type") is not None:
            noise_type = hyperparams["noise_type"].strip()
            noise_std = hyperparams["noise_std"]
            # Save for later (hyperparameter optimization)
            self.n_actions = env.action_space.shape[0]
            if "normal" in noise_type:
                hyperparams["action_noise"] = NormalActionNoise(
                    mean=np.zeros(self.n_actions),
                    sigma=noise_std * np.ones(self.n_actions),
                )
            elif "ornstein-uhlenbeck" in noise_type:
                hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(
                    mean=np.zeros(self.n_actions),
                    sigma=noise_std * np.ones(self.n_actions),
                )
            else:
                raise RuntimeError(f'Unknown noise type "{noise_type}"')
            print(f"Applying {noise_type} noise with std {noise_std}")
            del hyperparams["noise_type"]
            del hyperparams["noise_std"]
        return hyperparams
    def create_log_folder(self):
        os.makedirs(self.params_path, exist_ok=True)
    def create_callbacks(self):
        if self.save_freq > 0:
            # Account for the number of parallel environments
            self.save_freq = max(self.save_freq // self.n_envs, 1)
            self.callbacks.append(
                CheckpointCallbackWithReplayBuffer(
                    save_freq=self.save_freq,
                    save_path=self.save_path,
                    name_prefix="rl_model",
                    save_replay_buffer=self.save_replay_buffer,
                    verbose=self.verbose,
                )
            )
        # Create test env if needed, do not normalize reward
        if self.eval_freq > 0 and not self.optimize_hyperparameters:
            # Account for the number of parallel environments
            self.eval_freq = max(self.eval_freq // self.n_envs, 1)
            if self.verbose > 0:
                print("Creating test environment")
            save_vec_normalize = SaveVecNormalizeCallback(
                save_freq=1, save_path=self.params_path
            )
            eval_callback = EvalCallback(
                eval_env=self._env,
                # TODO: Use separate environment(s) for evaluation
                # self.create_envs(self.n_eval_envs, eval_env=True),
                callback_on_new_best=save_vec_normalize,
                best_model_save_path=self.save_path,
                n_eval_episodes=self.n_eval_episodes,
                log_path=self.save_path,
                eval_freq=self.eval_freq,
                deterministic=self.deterministic_eval,
            )
            self.callbacks.append(eval_callback)
    @staticmethod
    def is_atari(env_id: str) -> bool:
        entry_point = gym.envs.registry[env_id].entry_point
        return "AtariEnv" in str(entry_point)
    @staticmethod
    def is_bullet(env_id: str) -> bool:
        entry_point = gym.envs.registry[env_id].entry_point
        return "pybullet_envs" in str(entry_point)
    @staticmethod
    def is_robotics_env(env_id: str) -> bool:
        entry_point = gym.envs.registry[env_id].entry_point
        return "gym.envs.robotics" in str(entry_point) or "panda_gym.envs" in str(
            entry_point
        )
    @staticmethod
    def is_gazebo_env(env_id: str) -> bool:
        return "Gazebo" in gym.envs.registry[env_id].entry_point
    def _maybe_normalize(self, env: VecEnv, eval_env: bool) -> VecEnv:
        """
        Wrap the env into a VecNormalize wrapper if needed
        and load saved statistics when present.
        :param env:
        :param eval_env:
        :return:
        """
        # Pretrained model, load normalization
        path_ = os.path.join(os.path.dirname(self.trained_agent), self.env_id)
        path_ = os.path.join(path_, "vecnormalize.pkl")
        if os.path.exists(path_):
            print("Loading saved VecNormalize stats")
            env = VecNormalize.load(path_, env)
            # Deactivate training and reward normalization
            if eval_env:
                env.training = False
                env.norm_reward = False
        elif self.normalize:
            # Copy to avoid changing default values by reference
            local_normalize_kwargs = self.normalize_kwargs.copy()
            # Do not normalize reward for env used for evaluation
            if eval_env:
                if len(local_normalize_kwargs) > 0:
                    local_normalize_kwargs["norm_reward"] = False
                else:
                    local_normalize_kwargs = {"norm_reward": False}
            if self.verbose > 0:
                if len(local_normalize_kwargs) > 0:
                    print(f"Normalization activated: {local_normalize_kwargs}")
                else:
                    print("Normalizing input and reward")
            # Note: The following line was added but not sure if it is still required
            env.num_envs = self.n_envs
            env = VecNormalize(env, **local_normalize_kwargs)
        return env
    def create_envs(
        self, n_envs: int, eval_env: bool = False, no_log: bool = False
    ) -> VecEnv:
        """
        Create the environment and wrap it if necessary.
        :param n_envs:
        :param eval_env: Whether is it an environment used for evaluation or not
        :param no_log: Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: the vectorized environment, with appropriate wrappers
        """
        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else self.save_path
        monitor_kwargs = {}
        # Special case for GoalEnvs: log success rate too
        if (
            "Neck" in self.env_id
            or self.is_robotics_env(self.env_id)
            or "parking-v0" in self.env_id
        ):
            monitor_kwargs = dict(info_keywords=("is_success",))
        # On most env, SubprocVecEnv does not help and is quite memory hungry
        # therefore we use DummyVecEnv by default
        env = make_vec_env(
            env_id=self.env_id,
            n_envs=n_envs,
            seed=self.seed,
            env_kwargs=self.env_kwargs,
            monitor_dir=log_dir,
            wrapper_class=self.env_wrapper,
            vec_env_cls=self.vec_env_class,
            vec_env_kwargs=self.vec_env_kwargs,
            monitor_kwargs=monitor_kwargs,
        )
        # Wrap the env into a VecNormalize wrapper if needed
        # and load saved statistics when present
        env = self._maybe_normalize(env, eval_env)
        # Optional Frame-stacking
        if self.frame_stack is not None:
            n_stack = self.frame_stack
            env = VecFrameStack(env, n_stack)
            if self.verbose > 0:
                print(f"Stacking {n_stack} frames")
        if not is_vecenv_wrapped(env, VecTransposeImage):
            wrap_with_vectranspose = False
            if isinstance(env.observation_space, gym.spaces.Dict):
                # If even one of the keys is a image-space in need of transpose, apply transpose
                # If the image spaces are not consistent (for instance one is channel first,
                # the other channel last), VecTransposeImage will throw an error
                for space in env.observation_space.spaces.values():
                    wrap_with_vectranspose = wrap_with_vectranspose or (
                        is_image_space(space)
                        and not is_image_space_channels_first(space)
                    )
            else:
                wrap_with_vectranspose = is_image_space(
                    env.observation_space
                ) and not is_image_space_channels_first(env.observation_space)
            if wrap_with_vectranspose:
                if self.verbose >= 1:
                    print("Wrapping the env in a VecTransposeImage.")
                env = VecTransposeImage(env)
        return env
    def _load_pretrained_agent(
        self, hyperparams: Dict[str, Any], env: VecEnv
    ) -> BaseAlgorithm:
        # Continue training
        print(
            f"Loading pretrained agent '{self.trained_agent}' to continue its training"
        )
        # Policy should not be changed
        del hyperparams["policy"]
        if "policy_kwargs" in hyperparams.keys():
            del hyperparams["policy_kwargs"]
        model = ALGOS[self.algo].load(
            self.trained_agent,
            env=env,
            seed=self.seed,
            tensorboard_log=self.tensorboard_log,
            verbose=self.verbose,
            **hyperparams,
        )
        replay_buffer_path = os.path.join(
            os.path.dirname(self.trained_agent), "replay_buffer.pkl"
        )
        if not self.preload_replay_buffer and os.path.exists(replay_buffer_path):
            print("Loading replay buffer")
            # `truncate_last_traj` will be taken into account only if we use HER replay buffer
            model.load_replay_buffer(
                replay_buffer_path, truncate_last_traj=self.truncate_last_trajectory
            )
        return model
    def _create_sampler(self, sampler_method: str) -> BaseSampler:
        # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
        if sampler_method == "random":
            sampler = RandomSampler(seed=self.seed)
        elif sampler_method == "tpe":
            # TODO (external): try with multivariate=True
            sampler = TPESampler(n_startup_trials=self.n_startup_trials, seed=self.seed)
        elif sampler_method == "skopt":
            # cf https://scikit-optimize.github.io/#skopt.Optimizer
            # GP: gaussian process
            # Gradient boosted regression: GBRT
            sampler = SkoptSampler(
                skopt_kwargs={"base_estimator": "GP", "acq_func": "gp_hedge"}
            )
        else:
            raise ValueError(f"Unknown sampler: {sampler_method}")
        return sampler
    def _create_pruner(self, pruner_method: str) -> BasePruner:
        if pruner_method == "halving":
            pruner = SuccessiveHalvingPruner(
                min_resource=1, reduction_factor=4, min_early_stopping_rate=0
            )
        elif pruner_method == "median":
            pruner = MedianPruner(
                n_startup_trials=self.n_startup_trials,
                n_warmup_steps=self.n_evaluations // 3,
            )
        elif pruner_method == "none":
            # Do not prune
            pruner = NopPruner()
        else:
            raise ValueError(f"Unknown pruner: {pruner_method}")
        return pruner
    # Important: Objective changed in this project (rbs_gym) to evaluate on the same environment that is used for training (cannot have two existing simulatenously with the current setup)
    def objective(self, trial: optuna.Trial) -> float:
        kwargs = self._hyperparams.copy()
        trial.model_class = None
        # Hack to use DDPG/TD3 noise sampler
        trial.n_actions = self._env.action_space.shape[0]
        # Hack when using HerReplayBuffer
        if kwargs.get("replay_buffer_class") == HerReplayBuffer:
            trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {})
        # Sample candidate hyperparameters
        kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial))
        print(f"\nRunning a new trial with hyperparameters: {kwargs}")
        # Write hyperparameters into a file
        trial_params_path = os.path.join(self.params_path, "optimization")
        os.makedirs(trial_params_path, exist_ok=True)
        with open(
            os.path.join(
                trial_params_path, f"hyperparameters_trial_{trial.number}.yml"
            ),
            "w",
        ) as f:
            yaml.dump(kwargs, f)
        model = ALGOS[self.algo](
            env=self._env,
            # Note: Here I enabled tensorboard logs
            tensorboard_log=self.tensorboard_log,
            # Note: Here I differ and I seed the trial. I want all trials to have the same starting conditions
            seed=self.seed,
            verbose=self.verbose,
            **kwargs,
        )
        # Pre-load replay buffer if enabled
        if self.preload_replay_buffer:
            if self.preload_replay_buffer.endswith(".pkl"):
                replay_buffer_path = self.preload_replay_buffer
            else:
                replay_buffer_path = os.path.join(
                    self.preload_replay_buffer, "replay_buffer.pkl"
                )
            if os.path.exists(replay_buffer_path):
                print("Pre-loading replay buffer")
                if self.algo == "her":
                    model.load_replay_buffer(
                        replay_buffer_path, self.truncate_last_trajectory
                    )
                else:
                    model.load_replay_buffer(replay_buffer_path)
            else:
                raise Exception(f"Replay buffer {replay_buffer_path} " "does not exist")
        model.trial = trial
        eval_freq = int(self.n_timesteps / self.n_evaluations)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # Use non-deterministic eval for Atari
        callbacks = get_callback_list({"callback": self.specified_callbacks})
        path = None
        if self.optimization_log_path is not None:
            path = os.path.join(
                self.optimization_log_path, f"trial_{str(trial.number)}"
            )
        eval_callback = TrialEvalCallback(
            # TODO: Use a separate environment for evaluation during trial
            model.env,
            model.trial,
            best_model_save_path=path,
            log_path=path,
            n_eval_episodes=self.n_eval_episodes,
            eval_freq=eval_freq_,
            deterministic=self.deterministic_eval,
            verbose=self.verbose,
        )
        callbacks.append(eval_callback)
        try:
            model.learn(self.n_timesteps, callback=callbacks)
            # Reset env
            self._env.reset()
        except AssertionError as e:
            # Reset env
            self._env.reset()
            print("Trial stopped:", e)
            # Prune hyperparams that generate NaNs
            raise optuna.exceptions.TrialPruned()
        except Exception as err:
            exception_type = type(err).__name__
            print("Trial stopped due to raised exception:", exception_type, err)
            # Prune also all other exceptions
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        reward = eval_callback.last_mean_reward
        print(
            f"\nFinished a trial with reward={reward}, is_pruned={is_pruned} "
            f"for hyperparameters: {kwargs}"
        )
        del model
        if is_pruned:
            raise optuna.exceptions.TrialPruned()
        return reward
    def hyperparameters_optimization(self) -> None:
        if self.verbose > 0:
            print("Optimizing hyperparameters")
        if self.storage is not None and self.study_name is None:
            warnings.warn(
                f"You passed a remote storage: {self.storage} but no `--study-name`."
                "The study name will be generated by Optuna, make sure to re-use the same study name "
                "when you want to do distributed hyperparameter optimization."
            )
        if self.tensorboard_log is not None:
            warnings.warn(
                "Tensorboard log is deactivated when running hyperparameter optimization"
            )
            self.tensorboard_log = None
        # TODO (external): eval each hyperparams several times to account for noisy evaluation
        sampler = self._create_sampler(self.sampler)
        pruner = self._create_pruner(self.pruner)
        if self.verbose > 0:
            print(f"Sampler: {self.sampler} - Pruner: {self.pruner}")
        study = optuna.create_study(
            sampler=sampler,
            pruner=pruner,
            storage=self.storage,
            study_name=self.study_name,
            load_if_exists=True,
            direction="maximize",
        )
        try:
            study.optimize(
                self.objective,
                n_trials=self.n_trials,
                n_jobs=self.n_jobs,
                gc_after_trial=True,
                show_progress_bar=True,
            )
        except KeyboardInterrupt:
            pass
        print("Number of finished trials: ", len(study.trials))
        print("Best trial:")
        trial = study.best_trial
        print("Value: ", trial.value)
        print("Params: ")
        for key, value in trial.params.items():
            print(f"    {key}: {value}")
        report_name = (
            f"report_{self.env_id}_{self.n_trials}-trials-{self.n_timesteps}"
            f"-{self.sampler}-{self.pruner}_{int(time.time())}"
        )
        log_path = os.path.join(self.log_folder, self.algo, report_name)
        if self.verbose:
            print(f"Writing report to {log_path}")
        # Write report
        os.makedirs(os.path.dirname(log_path), exist_ok=True)
        study.trials_dataframe().to_csv(f"{log_path}.csv")
        # Save python object to inspect/re-use it later
        with open(f"{log_path}.pkl", "wb+") as f:
            pkl.dump(study, f)
        # Skip plots
        if self.no_optim_plots:
            return
        # Plot optimization result
        try:
            fig1 = plot_optimization_history(study)
            fig2 = plot_param_importances(study)
            fig1.show()
            fig2.show()
        except (ValueError, ImportError, RuntimeError):
            pass
    def collect_demonstration(self, model):
        # Any random action will do (this won't actually be used since `preload_replay_buffer` env kwarg is enabled)
        action = np.array([model.env.action_space.sample()])
        # Reset env at the beginning
        obs = model.env.reset()
        # Collect transitions
        for i in range(model.replay_buffer.buffer_size):
            # Note: If `None` is passed to Grasp env, it uses custom action heuristic to reach the target
            next_obs, rewards, dones, infos = model.env.unwrapped.step(action)
            # Extract the actual actions from info
            actual_actions = [info["actual_actions"] for info in infos]
            # Add to replay buffer
            model.replay_buffer.add(obs, next_obs, actual_actions, rewards, dones)
            # Update current observation
            obs = next_obs
        print("Saving replay buffer")
        model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl"))
        model.env.close()
        exit
--- a/env_manager/rbs_gym/rbs_gym/utils/hyperparams_opt.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/hyperparams_opt.py
@ -0,0 +1,170 @@
 from typing import Any, Dict
 import numpy as np
 import optuna
 from stable_baselines3.common.noise import NormalActionNoise
 from torch import nn as nn
 def sample_sac_params(
    trial: optuna.Trial,
 ) -> Dict[str, Any]:
    """
    Sampler for SAC hyperparameters
    """
    buffer_size = 150000
    # learning_starts = trial.suggest_categorical(
    #     "learning_starts", [5000, 10000, 20000])
    learning_starts = 5000
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512, 1024, 2048])
    learning_rate = trial.suggest_float(
        "learning_rate", low=0.000001, high=0.001, log=True
    )
    gamma = trial.suggest_float("gamma", low=0.98, high=1.0, log=True)
    tau = trial.suggest_float("tau", low=0.001, high=0.025, log=True)
    ent_coef = "auto_0.5_0.1"
    target_entropy = "auto"
    noise_std = trial.suggest_float("noise_std", low=0.01, high=0.2, log=True)
    action_noise = NormalActionNoise(
        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
    )
    train_freq = 1
    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
    policy_kwargs = dict()
    net_arch = trial.suggest_categorical("net_arch", [256, 384, 512])
    policy_kwargs["net_arch"] = [net_arch] * 3
    return {
        "buffer_size": buffer_size,
        "learning_starts": learning_starts,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "gamma": gamma,
        "tau": tau,
        "ent_coef": ent_coef,
        "target_entropy": target_entropy,
        "action_noise": action_noise,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "policy_kwargs": policy_kwargs,
    }
 def sample_td3_params(
    trial: optuna.Trial,
 ) -> Dict[str, Any]:
    """
    Sampler for TD3 hyperparameters
    """
    buffer_size = 150000
    # learning_starts = trial.suggest_categorical(
    #     "learning_starts", [5000, 10000, 20000])
    learning_starts = 5000
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    learning_rate = trial.suggest_float(
        "learning_rate", low=0.000001, high=0.001, log=True
    )
    gamma = trial.suggest_float("gamma", low=0.98, high=1.0, log=True)
    tau = trial.suggest_float("tau", low=0.001, high=0.025, log=True)
    target_policy_noise = trial.suggest_float(
        "target_policy_noise", low=0.00000001, high=0.5, log=True
    )
    target_noise_clip = 0.5
    noise_std = trial.suggest_float("noise_std", low=0.025, high=0.5, log=True)
    action_noise = NormalActionNoise(
        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
    )
    train_freq = 1
    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
    policy_kwargs = dict()
    net_arch = trial.suggest_categorical("net_arch", [256, 384, 512])
    policy_kwargs["net_arch"] = [net_arch] * 3
    return {
        "buffer_size": buffer_size,
        "learning_starts": learning_starts,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "gamma": gamma,
        "tau": tau,
        "target_policy_noise": target_policy_noise,
        "target_noise_clip": target_noise_clip,
        "action_noise": action_noise,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "policy_kwargs": policy_kwargs,
    }
 def sample_tqc_params(
    trial: optuna.Trial,
 ) -> Dict[str, Any]:
    """
    Sampler for TQC hyperparameters
    """
    buffer_size = 25000
    learning_starts = 0
    batch_size = 32
    learning_rate = trial.suggest_float(
        "learning_rate", low=0.000025, high=0.00075, log=True
    )
    gamma = 1.0 - trial.suggest_float("gamma", low=0.0001, high=0.025, log=True)
    tau = trial.suggest_float("tau", low=0.0005, high=0.025, log=True)
    ent_coef = "auto_0.1_0.05"
    target_entropy = "auto"
    noise_std = trial.suggest_float("noise_std", low=0.01, high=0.1, log=True)
    action_noise = NormalActionNoise(
        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
    )
    train_freq = 1
    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
    policy_kwargs = dict()
    net_arch = trial.suggest_categorical("net_arch", [128, 256, 384, 512])
    policy_kwargs["net_arch"] = [net_arch] * 2
    policy_kwargs["n_quantiles"] = trial.suggest_int("n_quantiles", low=20, high=40)
    top_quantiles_to_drop_per_net = round(0.08 * policy_kwargs["n_quantiles"])
    policy_kwargs["n_critics"] = trial.suggest_categorical("n_critics", [2, 3])
    return {
        "buffer_size": buffer_size,
        "learning_starts": learning_starts,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "gamma": gamma,
        "tau": tau,
        "ent_coef": ent_coef,
        "target_entropy": target_entropy,
        "top_quantiles_to_drop_per_net": top_quantiles_to_drop_per_net,
        "action_noise": action_noise,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "policy_kwargs": policy_kwargs,
    }
 HYPERPARAMS_SAMPLER = {
    "sac": sample_sac_params,
    "td3": sample_td3_params,
    "tqc": sample_tqc_params,
 }
--- a/env_manager/rbs_gym/rbs_gym/utils/utils.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/utils.py
@ -0,0 +1,411 @@
 import argparse
 import glob
 import importlib
 import os
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import gymnasium as gym
 # For custom activation fn
 import stable_baselines3 as sb3  # noqa: F401
 import torch as th  # noqa: F401
 import yaml
 from sb3_contrib import QRDQN, TQC
 from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
 from stable_baselines3.common.callbacks import BaseCallback
 from stable_baselines3.common.env_util import make_vec_env
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.sb2_compat.rmsprop_tf_like import (  # noqa: F401
    RMSpropTFLike,
 )
 from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    SubprocVecEnv,
    VecEnv,
    VecFrameStack,
    VecNormalize,
 )
 from torch import nn as nn  # noqa: F401 pylint: disable=unused-import
 ALGOS = {
    "a2c": A2C,
    "ddpg": DDPG,
    "dqn": DQN,
    "ppo": PPO,
    "sac": SAC,
    "td3": TD3,
    # SB3 Contrib,
    "qrdqn": QRDQN,
    "tqc": TQC,
 }
 def flatten_dict_observations(env: gym.Env) -> gym.Env:
    assert isinstance(env.observation_space, gym.spaces.Dict)
    try:
        return gym.wrappers.FlattenObservation(env)
    except AttributeError:
        keys = env.observation_space.spaces.keys()
        return gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))
 def get_wrapper_class(
    hyperparams: Dict[str, Any]
 ) -> Optional[Callable[[gym.Env], gym.Env]]:
    """
    Get one or more Gym environment wrapper class specified as a hyper parameter
    "env_wrapper".
    e.g.
    env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
    for multiple, specify a list:
    env_wrapper:
        - utils.wrappers.PlotActionWrapper
        - utils.wrappers.TimeFeatureWrapper
    :param hyperparams:
    :return: maybe a callable to wrap the environment
        with one or multiple gym.Wrapper
    """
    def get_module_name(wrapper_name):
        return ".".join(wrapper_name.split(".")[:-1])
    def get_class_name(wrapper_name):
        return wrapper_name.split(".")[-1]
    if "env_wrapper" in hyperparams.keys():
        wrapper_name = hyperparams.get("env_wrapper")
        if wrapper_name is None:
            return None
        if not isinstance(wrapper_name, list):
            wrapper_names = [wrapper_name]
        else:
            wrapper_names = wrapper_name
        wrapper_classes = []
        wrapper_kwargs = []
        # Handle multiple wrappers
        for wrapper_name in wrapper_names:
            # Handle keyword arguments
            if isinstance(wrapper_name, dict):
                assert len(wrapper_name) == 1, (
                    "You have an error in the formatting "
                    f"of your YAML file near {wrapper_name}. "
                    "You should check the indentation."
                )
                wrapper_dict = wrapper_name
                wrapper_name = list(wrapper_dict.keys())[0]
                kwargs = wrapper_dict[wrapper_name]
            else:
                kwargs = {}
            wrapper_module = importlib.import_module(get_module_name(wrapper_name))
            wrapper_class = getattr(wrapper_module, get_class_name(wrapper_name))
            wrapper_classes.append(wrapper_class)
            wrapper_kwargs.append(kwargs)
        def wrap_env(env: gym.Env) -> gym.Env:
            """
            :param env:
            :return:
            """
            for wrapper_class, kwargs in zip(wrapper_classes, wrapper_kwargs):
                env = wrapper_class(env, **kwargs)
            return env
        return wrap_env
    else:
        return None
 def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]:
    """
    Get one or more Callback class specified as a hyper-parameter
    "callback".
    e.g.
    callback: stable_baselines3.common.callbacks.CheckpointCallback
    for multiple, specify a list:
    callback:
        - utils.callbacks.PlotActionWrapper
        - stable_baselines3.common.callbacks.CheckpointCallback
    :param hyperparams:
    :return:
    """
    def get_module_name(callback_name):
        return ".".join(callback_name.split(".")[:-1])
    def get_class_name(callback_name):
        return callback_name.split(".")[-1]
    callbacks = []
    if "callback" in hyperparams.keys():
        callback_name = hyperparams.get("callback")
        if callback_name is None:
            return callbacks
        if not isinstance(callback_name, list):
            callback_names = [callback_name]
        else:
            callback_names = callback_name
        # Handle multiple wrappers
        for callback_name in callback_names:
            # Handle keyword arguments
            if isinstance(callback_name, dict):
                assert len(callback_name) == 1, (
                    "You have an error in the formatting "
                    f"of your YAML file near {callback_name}. "
                    "You should check the indentation."
                )
                callback_dict = callback_name
                callback_name = list(callback_dict.keys())[0]
                kwargs = callback_dict[callback_name]
            else:
                kwargs = {}
            callback_module = importlib.import_module(get_module_name(callback_name))
            callback_class = getattr(callback_module, get_class_name(callback_name))
            callbacks.append(callback_class(**kwargs))
    return callbacks
 def create_test_env(
    env_id: str,
    n_envs: int = 1,
    stats_path: Optional[str] = None,
    seed: int = 0,
    log_dir: Optional[str] = None,
    should_render: bool = True,
    hyperparams: Optional[Dict[str, Any]] = None,
    env_kwargs: Optional[Dict[str, Any]] = None,
 ) -> VecEnv:
    """
    Create environment for testing a trained agent
    :param env_id:
    :param n_envs: number of processes
    :param stats_path: path to folder containing saved running averaged
    :param seed: Seed for random number generator
    :param log_dir: Where to log rewards
    :param should_render: For Pybullet env, display the GUI
    :param hyperparams: Additional hyperparams (ex: n_stack)
    :param env_kwargs: Optional keyword argument to pass to the env constructor
    :return:
    """
    # Avoid circular import
    from rbs_gym.utils.exp_manager import ExperimentManager
    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)
    hyperparams = {} if hyperparams is None else hyperparams
    if "env_wrapper" in hyperparams.keys():
        del hyperparams["env_wrapper"]
    vec_env_kwargs = {}
    vec_env_cls = DummyVecEnv
    if n_envs > 1 or (ExperimentManager.is_bullet(env_id) and should_render):
        # HACK: force SubprocVecEnv for Bullet env
        # as Pybullet envs does not follow gym.render() interface
        vec_env_cls = SubprocVecEnv
        # start_method = 'spawn' for thread safe
    env = make_vec_env(
        env_id,
        n_envs=n_envs,
        monitor_dir=log_dir,
        seed=seed,
        wrapper_class=env_wrapper,
        env_kwargs=env_kwargs,
        vec_env_cls=vec_env_cls,
        vec_env_kwargs=vec_env_kwargs,
    )
    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams["normalize"]:
            print("Loading running average")
            print(f"with params: {hyperparams['normalize_kwargs']}")
            path_ = os.path.join(stats_path, "vecnormalize.pkl")
            if os.path.exists(path_):
                env = VecNormalize.load(path_, env)
                # Deactivate training and reward normalization
                env.training = False
                env.norm_reward = False
            else:
                raise ValueError(f"VecNormalize stats {path_} not found")
        n_stack = hyperparams.get("frame_stack", 0)
        if n_stack > 0:
            print(f"Stacking {n_stack} frames")
            env = VecFrameStack(env, n_stack)
    return env
 def linear_schedule(initial_value: Union[float, str]) -> Callable[[float], float]:
    """
    Linear learning rate schedule.
    :param initial_value: (float or str)
    :return: (function)
    """
    if isinstance(initial_value, str):
        initial_value = float(initial_value)
    def func(progress_remaining: float) -> float:
        """
        Progress will decrease from 1 (beginning) to 0
        :param progress_remaining: (float)
        :return: (float)
        """
        return progress_remaining * initial_value
    return func
 def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
    """
    :param log_folder: Root log folder
    :return: Dict[str, Tuple[str, str]] representing the trained agents
    """
    trained_models = {}
    for algo in os.listdir(log_folder):
        if not os.path.isdir(os.path.join(log_folder, algo)):
            continue
        for env_id in os.listdir(os.path.join(log_folder, algo)):
            # Retrieve env name
            env_id = env_id.split("_")[0]
            trained_models[f"{algo}-{env_id}"] = (algo, env_id)
    return trained_models
 def get_latest_run_id(log_path: str, env_id: str) -> int:
    """
    Returns the latest run number for the given log name and log path,
    by finding the greatest number in the directories.
    :param log_path: path to log folder
    :param env_id:
    :return: latest run number
    """
    max_run_id = 0
    for path in glob.glob(os.path.join(log_path, env_id + "_[0-9]*")):
        file_name = os.path.basename(path)
        ext = file_name.split("_")[-1]
        if (
            env_id == "_".join(file_name.split("_")[:-1])
            and ext.isdigit()
            and int(ext) > max_run_id
        ):
            max_run_id = int(ext)
    return max_run_id
 def get_saved_hyperparams(
    stats_path: str,
    norm_reward: bool = False,
    test_mode: bool = False,
 ) -> Tuple[Dict[str, Any], str]:
    """
    :param stats_path:
    :param norm_reward:
    :param test_mode:
    :return:
    """
    hyperparams = {}
    if not os.path.isdir(stats_path):
        stats_path = None
    else:
        config_file = os.path.join(stats_path, "config.yml")
        if os.path.isfile(config_file):
            # Load saved hyperparameters
            with open(os.path.join(stats_path, "config.yml"), "r") as f:
                hyperparams = yaml.load(
                    f, Loader=yaml.UnsafeLoader
                )  # pytype: disable=module-attr
            hyperparams["normalize"] = hyperparams.get("normalize", False)
        else:
            obs_rms_path = os.path.join(stats_path, "obs_rms.pkl")
            hyperparams["normalize"] = os.path.isfile(obs_rms_path)
        # Load normalization params
        if hyperparams["normalize"]:
            if isinstance(hyperparams["normalize"], str):
                normalize_kwargs = eval(hyperparams["normalize"])
                if test_mode:
                    normalize_kwargs["norm_reward"] = norm_reward
            else:
                normalize_kwargs = {
                    "norm_obs": hyperparams["normalize"],
                    "norm_reward": norm_reward,
                }
            hyperparams["normalize_kwargs"] = normalize_kwargs
    return hyperparams, stats_path
 class StoreDict(argparse.Action):
    """
    Custom argparse action for storing dict.
    In: args1:0.0 args2:"dict(a=1)"
    Out: {'args1': 0.0, arg2: dict(a=1)}
    """
    def __init__(self, option_strings, dest, nargs=None, **kwargs):
        self._nargs = nargs
        super(StoreDict, self).__init__(option_strings, dest, nargs=nargs, **kwargs)
    def __call__(self, parser, namespace, values, option_string=None):
        arg_dict = {}
        if hasattr(namespace, self.dest):
            current_arg = getattr(namespace, self.dest)
            if isinstance(current_arg, Dict):
                arg_dict = getattr(namespace, self.dest)
        for arguments in values:
            if not arguments:
                continue
            key = arguments.split(":")[0]
            value = ":".join(arguments.split(":")[1:])
            # Evaluate the string as python code
            arg_dict[key] = eval(value)
        setattr(namespace, self.dest, arg_dict)
 def str2bool(value: Union[str, bool]) -> bool:
    """
    Convert logical string to boolean. Can be used as argparse type.
    """
    if isinstance(value, bool):
        return value
    if value.lower() in ("yes", "true", "t", "y", "1"):
        return True
    elif value.lower() in ("no", "false", "f", "n", "0"):
        return False
    else:
        raise argparse.ArgumentTypeError("Boolean value expected.")
 def empty_str2none(value: Optional[str]) -> Optional[str]:
    """
    If string is empty, convert to None. Can be used as argparse type.
    """
    if not value:
        return None
    return value
--- a/env_manager/rbs_gym/rbs_gym/utils/wrappers.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/wrappers.py
@ -0,0 +1,395 @@
 import gymnasium as gym
 import numpy as np
 from matplotlib import pyplot as plt
 from scipy.signal import iirfilter, sosfilt, zpk2sos
 class DoneOnSuccessWrapper(gym.Wrapper):
    """
    Reset on success and offsets the reward.
    Useful for GoalEnv.
    """
    def __init__(self, env: gym.Env, reward_offset: float = 0.0, n_successes: int = 1):
        super(DoneOnSuccessWrapper, self).__init__(env)
        self.reward_offset = reward_offset
        self.n_successes = n_successes
        self.current_successes = 0
    def reset(self):
        self.current_successes = 0
        return self.env.reset()
    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        if info.get("is_success", False):
            self.current_successes += 1
        else:
            self.current_successes = 0
        # number of successes in a row
        done = done or self.current_successes >= self.n_successes
        reward += self.reward_offset
        return obs, reward, done, info
    def compute_reward(self, achieved_goal, desired_goal, info):
        reward = self.env.compute_reward(achieved_goal, desired_goal, info)
        return reward + self.reward_offset
 class ActionNoiseWrapper(gym.Wrapper):
    """
    Add gaussian noise to the action (without telling the agent),
    to test the robustness of the control.
    :param env: (gym.Env)
    :param noise_std: (float) Standard deviation of the noise
    """
    def __init__(self, env, noise_std=0.1):
        super(ActionNoiseWrapper, self).__init__(env)
        self.noise_std = noise_std
    def step(self, action):
        noise = np.random.normal(
            np.zeros_like(action), np.ones_like(action) * self.noise_std
        )
        noisy_action = action + noise
        return self.env.step(noisy_action)
 # from https://docs.obspy.org
 def lowpass(data, freq, df, corners=4, zerophase=False):
    """
    Butterworth-Lowpass Filter.
    Filter data removing data over certain frequency ``freq`` using ``corners``
    corners.
    The filter uses :func:`scipy.signal.iirfilter` (for design)
    and :func:`scipy.signal.sosfilt` (for applying the filter).
    :type data: numpy.ndarray
    :param data: Data to filter.
    :param freq: Filter corner frequency.
    :param df: Sampling rate in Hz.
    :param corners: Filter corners / order.
    :param zerophase: If True, apply filter once forwards and once backwards.
        This results in twice the number of corners but zero phase shift in
        the resulting filtered trace.
    :return: Filtered data.
    """
    fe = 0.5 * df
    f = freq / fe
    # raise for some bad scenarios
    if f > 1:
        f = 1.0
        msg = (
            "Selected corner frequency is above Nyquist. "
            + "Setting Nyquist as high corner."
        )
        print(msg)
    z, p, k = iirfilter(corners, f, btype="lowpass", ftype="butter", output="zpk")
    sos = zpk2sos(z, p, k)
    if zerophase:
        firstpass = sosfilt(sos, data)
        return sosfilt(sos, firstpass[::-1])[::-1]
    else:
        return sosfilt(sos, data)
 class LowPassFilterWrapper(gym.Wrapper):
    """
    Butterworth-Lowpass
    :param env: (gym.Env)
    :param freq: Filter corner frequency.
    :param df: Sampling rate in Hz.
    """
    def __init__(self, env, freq=5.0, df=25.0):
        super(LowPassFilterWrapper, self).__init__(env)
        self.freq = freq
        self.df = df
        self.signal = []
    def reset(self):
        self.signal = []
        return self.env.reset()
    def step(self, action):
        self.signal.append(action)
        filtered = np.zeros_like(action)
        for i in range(self.action_space.shape[0]):
            smoothed_action = lowpass(
                np.array(self.signal)[:, i], freq=self.freq, df=self.df
            )
            filtered[i] = smoothed_action[-1]
        return self.env.step(filtered)
 class ActionSmoothingWrapper(gym.Wrapper):
    """
    Smooth the action using exponential moving average.
    :param env: (gym.Env)
    :param smoothing_coef: (float) Smoothing coefficient (0 no smoothing, 1 very smooth)
    """
    def __init__(self, env, smoothing_coef: float = 0.0):
        super(ActionSmoothingWrapper, self).__init__(env)
        self.smoothing_coef = smoothing_coef
        self.smoothed_action = None
        # from https://github.com/rail-berkeley/softlearning/issues/3
        # for smoothing latent space
        # self.alpha = self.smoothing_coef
        # self.beta = np.sqrt(1 - self.alpha ** 2) / (1 - self.alpha)
    def reset(self):
        self.smoothed_action = None
        return self.env.reset()
    def step(self, action):
        if self.smoothed_action is None:
            self.smoothed_action = np.zeros_like(action)
        self.smoothed_action = (
            self.smoothing_coef * self.smoothed_action
            + (1 - self.smoothing_coef) * action
        )
        return self.env.step(self.smoothed_action)
 class DelayedRewardWrapper(gym.Wrapper):
    """
    Delay the reward by `delay` steps, it makes the task harder but more realistic.
    The reward is accumulated during those steps.
    :param env: (gym.Env)
    :param delay: (int) Number of steps the reward should be delayed.
    """
    def __init__(self, env, delay=10):
        super(DelayedRewardWrapper, self).__init__(env)
        self.delay = delay
        self.current_step = 0
        self.accumulated_reward = 0.0
    def reset(self):
        self.current_step = 0
        self.accumulated_reward = 0.0
        return self.env.reset()
    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.accumulated_reward += reward
        self.current_step += 1
        if self.current_step % self.delay == 0 or done:
            reward = self.accumulated_reward
            self.accumulated_reward = 0.0
        else:
            reward = 0.0
        return obs, reward, done, info
 class HistoryWrapper(gym.Wrapper):
    """
    Stack past observations and actions to give an history to the agent.
    :param env: (gym.Env)
    :param horizon: (int) Number of steps to keep in the history.
    """
    def __init__(self, env: gym.Env, horizon: int = 5):
        assert isinstance(env.observation_space, gym.spaces.Box)
        wrapped_obs_space = env.observation_space
        wrapped_action_space = env.action_space
        # TODO (external): double check, it seems wrong when we have different low and highs
        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
        low = np.concatenate((low_obs, low_action))
        high = np.concatenate((high_obs, high_action))
        # Overwrite the observation space
        env.observation_space = gym.spaces.Box(
            low=low, high=high, dtype=wrapped_obs_space.dtype
        )
        super(HistoryWrapper, self).__init__(env)
        self.horizon = horizon
        self.low_action, self.high_action = low_action, high_action
        self.low_obs, self.high_obs = low_obs, high_obs
        self.low, self.high = low, high
        self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
        self.action_history = np.zeros(low_action.shape, low_action.dtype)
    def _create_obs_from_history(self):
        return np.concatenate((self.obs_history, self.action_history))
    def reset(self):
        # Flush the history
        self.obs_history[...] = 0
        self.action_history[...] = 0
        obs = self.env.reset()
        self.obs_history[..., -obs.shape[-1] :] = obs
        return self._create_obs_from_history()
    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        last_ax_size = obs.shape[-1]
        self.obs_history = np.roll(self.obs_history, shift=-last_ax_size, axis=-1)
        self.obs_history[..., -obs.shape[-1] :] = obs
        self.action_history = np.roll(
            self.action_history, shift=-action.shape[-1], axis=-1
        )
        self.action_history[..., -action.shape[-1] :] = action
        return self._create_obs_from_history(), reward, done, info
 class HistoryWrapperObsDict(gym.Wrapper):
    """
    History Wrapper for dict observation.
    :param env: (gym.Env)
    :param horizon: (int) Number of steps to keep in the history.
    """
    def __init__(self, env, horizon=5):
        assert isinstance(env.observation_space.spaces["observation"], gym.spaces.Box)
        wrapped_obs_space = env.observation_space.spaces["observation"]
        wrapped_action_space = env.action_space
        # TODO (external): double check, it seems wrong when we have different low and highs
        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
        low = np.concatenate((low_obs, low_action))
        high = np.concatenate((high_obs, high_action))
        # Overwrite the observation space
        env.observation_space.spaces["observation"] = gym.spaces.Box(
            low=low, high=high, dtype=wrapped_obs_space.dtype
        )
        super(HistoryWrapperObsDict, self).__init__(env)
        self.horizon = horizon
        self.low_action, self.high_action = low_action, high_action
        self.low_obs, self.high_obs = low_obs, high_obs
        self.low, self.high = low, high
        self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
        self.action_history = np.zeros(low_action.shape, low_action.dtype)
    def _create_obs_from_history(self):
        return np.concatenate((self.obs_history, self.action_history))
    def reset(self):
        # Flush the history
        self.obs_history[...] = 0
        self.action_history[...] = 0
        obs_dict = self.env.reset()
        obs = obs_dict["observation"]
        self.obs_history[..., -obs.shape[-1] :] = obs
        obs_dict["observation"] = self._create_obs_from_history()
        return obs_dict
    def step(self, action):
        obs_dict, reward, done, info = self.env.step(action)
        obs = obs_dict["observation"]
        last_ax_size = obs.shape[-1]
        self.obs_history = np.roll(self.obs_history, shift=-last_ax_size, axis=-1)
        self.obs_history[..., -obs.shape[-1] :] = obs
        self.action_history = np.roll(
            self.action_history, shift=-action.shape[-1], axis=-1
        )
        self.action_history[..., -action.shape[-1] :] = action
        obs_dict["observation"] = self._create_obs_from_history()
        return obs_dict, reward, done, info
 class PlotActionWrapper(gym.Wrapper):
    """
    Wrapper for plotting the taken actions.
    Only works with 1D actions for now.
    Optionally, it can be used to plot the observations too.
    :param env: (gym.Env)
    :param plot_freq: (int) Plot every `plot_freq` episodes
    """
    def __init__(self, env, plot_freq=5):
        super(PlotActionWrapper, self).__init__(env)
        self.plot_freq = plot_freq
        self.current_episode = 0
        # Observation buffer (Optional)
        # self.observations = []
        # Action buffer
        self.actions = []
    def reset(self):
        self.current_episode += 1
        if self.current_episode % self.plot_freq == 0:
            self.plot()
            # Reset
            self.actions = []
        obs = self.env.reset()
        self.actions.append([])
        # self.observations.append(obs)
        return obs
    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.actions[-1].append(action)
        # self.observations.append(obs)
        return obs, reward, done, info
    def plot(self):
        actions = self.actions
        x = np.arange(sum([len(episode) for episode in actions]))
        plt.figure("Actions")
        plt.title("Actions during exploration", fontsize=14)
        plt.xlabel("Timesteps", fontsize=14)
        plt.ylabel("Action", fontsize=14)
        start = 0
        for i in range(len(self.actions)):
            end = start + len(self.actions[i])
            plt.plot(x[start:end], self.actions[i])
            # Clipped actions: real behavior, note that it is between [-2, 2] for the Pendulum
            # plt.scatter(x[start:end], np.clip(self.actions[i], -1, 1), s=1)
            # plt.scatter(x[start:end], self.actions[i], s=1)
            start = end
        plt.show()
 class FeatureExtractorFreezeParammetersWrapper(gym.Wrapper):
    """
    Freezes parameters of the feature extractor.
    """
    def __init__(self, env: gym.Env):
        super(FeatureExtractorFreezeParammetersWrapper, self).__init__(env)
        for param in self.feature_extractor.parameters():
            param.requires_grad = False
--- a/env_manager/rbs_gym/scripts/evaluate.py
+++ b/env_manager/rbs_gym/scripts/evaluate.py
@ -0,0 +1,294 @@
 #!/usr/bin/env -S python3 -O
 import argparse
 import os
 from typing import Dict
 import numpy as np
 import torch as th
 import yaml
 from stable_baselines3.common.utils import set_random_seed
 from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, VecEnvWrapper
 from rbs_gym import envs as gz_envs
 from rbs_gym.utils import create_test_env, get_latest_run_id, get_saved_hyperparams
 from rbs_gym.utils.utils import ALGOS, StoreDict, str2bool
 def main(args: Dict):
    if args.exp_id == 0:
        args.exp_id = get_latest_run_id(
            os.path.join(args.log_folder, args.algo), args.env
        )
        print(f"Loading latest experiment, id={args.exp_id}")
    # Sanity checks
    if args.exp_id > 0:
        log_path = os.path.join(args.log_folder, args.algo, f"{args.env}_{args.exp_id}")
    else:
        log_path = os.path.join(args.log_folder, args.algo)
    assert os.path.isdir(log_path), f"The {log_path} folder was not found"
    found = False
    for ext in ["zip"]:
        model_path = os.path.join(log_path, f"{args.env}.{ext}")
        found = os.path.isfile(model_path)
        if found:
            break
    if args.load_best:
        model_path = os.path.join(log_path, "best_model.zip")
        found = os.path.isfile(model_path)
    if args.load_checkpoint is not None:
        model_path = os.path.join(
            log_path, f"rl_model_{args.load_checkpoint}_steps.zip"
        )
        found = os.path.isfile(model_path)
    if not found:
        raise ValueError(
            f"No model found for {args.algo} on {args.env}, path: {model_path}"
        )
    off_policy_algos = ["qrdqn", "dqn", "ddpg", "sac", "her", "td3", "tqc"]
    if args.algo in off_policy_algos:
        args.n_envs = 1
    set_random_seed(args.seed)
    if args.num_threads > 0:
        if args.verbose > 1:
            print(f"Setting torch.num_threads to {args.num_threads}")
        th.set_num_threads(args.num_threads)
    stats_path = os.path.join(log_path, args.env)
    hyperparams, stats_path = get_saved_hyperparams(
        stats_path, norm_reward=args.norm_reward, test_mode=True
    )
    # load env_kwargs if existing
    env_kwargs = {}
    args_path = os.path.join(log_path, args.env, "args.yml")
    if os.path.isfile(args_path):
        with open(args_path, "r") as f:
            # pytype: disable=module-attr
            loaded_args = yaml.load(f, Loader=yaml.UnsafeLoader)
            if loaded_args["env_kwargs"] is not None:
                env_kwargs = loaded_args["env_kwargs"]
    # overwrite with command line arguments
    if args.env_kwargs is not None:
        env_kwargs.update(args.env_kwargs)
    log_dir = args.reward_log if args.reward_log != "" else None
    env = create_test_env(
        args.env,
        n_envs=args.n_envs,
        stats_path=stats_path,
        seed=args.seed,
        log_dir=log_dir,
        should_render=not args.no_render,
        hyperparams=hyperparams,
        env_kwargs=env_kwargs,
    )
    kwargs = dict(seed=args.seed)
    if args.algo in off_policy_algos:
        # Dummy buffer size as we don't need memory to evaluate the trained agent
        kwargs.update(dict(buffer_size=1))
    custom_objects = {'observation_space': env.observation_space, 'action_space': env.action_space}
    model = ALGOS[args.algo].load(model_path, env=env, custom_objects=custom_objects, **kwargs)
    obs = env.reset()
    # Deterministic by default
    stochastic = args.stochastic
    deterministic = not stochastic
    print(
        f"Evaluating for {args.n_episodes} episodes with a",
        "deterministic" if deterministic else "stochastic",
        "policy.",
    )
    state = None
    episode_reward = 0.0
    episode_rewards, episode_lengths, success_episode_lengths = [], [], []
    ep_len = 0
    episode = 0
    # For HER, monitor success rate
    successes = []
    while episode < args.n_episodes:
        action, state = model.predict(obs, state=state, deterministic=deterministic)
        obs, reward, done, infos = env.step(action)
        if not args.no_render:
            env.render("human")
        episode_reward += reward[0]
        ep_len += 1
        if done and args.verbose > 0:
            episode += 1
            print(f"--- Episode {episode}/{args.n_episodes}")
            # NOTE: for env using VecNormalize, the mean reward
            # is a normalized reward when `--norm_reward` flag is passed
            print(f"Episode Reward: {episode_reward:.2f}")
            episode_rewards.append(episode_reward)
            print("Episode Length", ep_len)
            episode_lengths.append(ep_len)
            if infos[0].get("is_success") is not None:
                print("Success?:", infos[0].get("is_success", False))
                successes.append(infos[0].get("is_success", False))
                if infos[0].get("is_success"):
                    success_episode_lengths.append(ep_len)
                print(f"Current success rate: {100 * np.mean(successes):.2f}%")
            episode_reward = 0.0
            ep_len = 0
            state = None
    if args.verbose > 0 and len(successes) > 0:
        print(f"Success rate: {100 * np.mean(successes):.2f}%")
    if args.verbose > 0 and len(episode_rewards) > 0:
        print(
            f"Mean reward: {np.mean(episode_rewards):.2f} "
            f"+/- {np.std(episode_rewards):.2f}"
        )
    if args.verbose > 0 and len(episode_lengths) > 0:
        print(
            f"Mean episode length: {np.mean(episode_lengths):.2f} "
            f"+/- {np.std(episode_lengths):.2f}"
        )
    if args.verbose > 0 and len(success_episode_lengths) > 0:
        print(
            f"Mean episode length of successful episodes: {np.mean(success_episode_lengths):.2f} "
            f"+/- {np.std(success_episode_lengths):.2f}"
        )
    # Workaround for https://github.com/openai/gym/issues/893
    if not args.no_render:
        if args.n_envs == 1 and "Bullet" not in args.env and isinstance(env, VecEnv):
            # DummyVecEnv
            # Unwrap env
            while isinstance(env, VecEnvWrapper):
                env = env.venv
            if isinstance(env, DummyVecEnv):
                env.envs[0].env.close()
            else:
                env.close()
        else:
            # SubprocVecEnv
            env.close()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Environment and its parameters
    parser.add_argument(
        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
    )
    parser.add_argument(
        "--env-kwargs",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional keyword argument to pass to the env constructor",
    )
    parser.add_argument("--n-envs", type=int, default=1, help="Number of environments")
    # Algorithm
    parser.add_argument(
        "--algo",
        type=str,
        choices=list(ALGOS.keys()),
        required=False,
        default="sac",
        help="RL algorithm to use during the training",
    )
    parser.add_argument(
        "--num-threads",
        type=int,
        default=-1,
        help="Number of threads for PyTorch (-1 to use default)",
    )
    # Test duration
    parser.add_argument(
        "-n",
        "--n-episodes",
        type=int,
        default=200,
        help="Number of evaluation episodes",
    )
    # Random seed
    parser.add_argument("--seed", type=int, default=0, help="Random generator seed")
    # Model to test
    parser.add_argument(
        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
    )
    parser.add_argument(
        "--exp-id",
        type=int,
        default=0,
        help="Experiment ID (default: 0: latest, -1: no exp folder)",
    )
    parser.add_argument(
        "--load-best",
        type=str2bool,
        default=False,
        help="Load best model instead of last model if available",
    )
    parser.add_argument(
        "--load-checkpoint",
        type=int,
        help="Load checkpoint instead of last model if available, you must pass the number of timesteps corresponding to it",
    )
    # Deterministic/stochastic actions
    parser.add_argument(
        "--stochastic",
        type=str2bool,
        default=False,
        help="Use stochastic actions instead of deterministic",
    )
    # Logging
    parser.add_argument(
        "--reward-log", type=str, default="reward_logs", help="Where to log reward"
    )
    parser.add_argument(
        "--norm-reward",
        type=str2bool,
        default=False,
        help="Normalize reward if applicable (trained with VecNormalize)",
    )
    # Disable render
    parser.add_argument(
        "--no-render",
        type=str2bool,
        default=False,
        help="Do not render the environment (useful for tests)",
    )
    # Verbosity
    parser.add_argument(
        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
    )
    args, unknown = parser.parse_known_args()
    main(args)
--- a/env_manager/rbs_gym/scripts/optimize.py
+++ b/env_manager/rbs_gym/scripts/optimize.py
@ -0,0 +1,233 @@
 #!/usr/bin/env -S python3 -O
 import argparse
 import difflib
 import os
 import uuid
 from typing import Dict
 import gymnasium as gym
 import numpy as np
 import torch as th
 from stable_baselines3.common.utils import set_random_seed
 from rbs_gym import envs as gz_envs
 from rbs_gym.utils.exp_manager import ExperimentManager
 from rbs_gym.utils.utils import ALGOS, StoreDict, empty_str2none, str2bool
 def main(args: Dict):
    # Check if the selected environment is valid
    # If it could not be found, suggest the closest match
    registered_envs = set(gym.envs.registry.keys())
    if args.env not in registered_envs:
        try:
            closest_match = difflib.get_close_matches(args.env, registered_envs, n=1)[0]
        except IndexError:
            closest_match = "'no close match found...'"
        raise ValueError(
            f"{args.env} not found in gym registry, you maybe meant {closest_match}?"
        )
    # If no specific seed is selected, choose a random one
    if args.seed < 0:
        args.seed = np.random.randint(2**32 - 1, dtype=np.int64).item()
    # Set the random seed across platforms
    set_random_seed(args.seed)
    # Setting num threads to 1 makes things run faster on cpu
    if args.num_threads > 0:
        if args.verbose > 1:
            print(f"Setting torch.num_threads to {args.num_threads}")
        th.set_num_threads(args.num_threads)
    # Verify that pre-trained agent exists before continuing to train it
    if args.trained_agent != "":
        assert args.trained_agent.endswith(".zip") and os.path.isfile(
            args.trained_agent
        ), "The trained_agent must be a valid path to a .zip file"
    # If enabled, ensure that the run has a unique ID
    uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""
    print("=" * 10, args.env, "=" * 10)
    print(f"Seed: {args.seed}")
    env_kwargs = {
        "render_mode": "human"
    }
    exp_manager = ExperimentManager(
        args,
        args.algo,
        args.env,
        args.log_folder,
        args.tensorboard_log,
        args.n_timesteps,
        args.eval_freq,
        args.eval_episodes,
        args.save_freq,
        args.hyperparams,
        args.env_kwargs,
        args.trained_agent,
        truncate_last_trajectory=args.truncate_last_trajectory,
        uuid_str=uuid_str,
        seed=args.seed,
        log_interval=args.log_interval,
        save_replay_buffer=args.save_replay_buffer,
        preload_replay_buffer=args.preload_replay_buffer,
        verbose=args.verbose,
        vec_env_type=args.vec_env,
    )
    # Prepare experiment
    model = exp_manager.setup_experiment()
    exp_manager.learn(model)
    exp_manager.save_trained_model(model)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Environment and its parameters
    parser.add_argument(
        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
    )
    parser.add_argument(
        "--env-kwargs",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional keyword argument to pass to the env constructor",
    )
    parser.add_argument(
        "--vec-env",
        type=str,
        choices=["dummy", "subproc"],
        default="dummy",
        help="Type of VecEnv to use",
    )
    # Algorithm and training
    parser.add_argument(
        "--algo",
        type=str,
        choices=list(ALGOS.keys()),
        required=False,
        default="sac",
        help="RL algorithm to use during the training",
    )
    parser.add_argument(
        "-params",
        "--hyperparams",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10)",
    )
    parser.add_argument(
        "-n",
        "--n-timesteps",
        type=int,
        default=-1,
        help="Overwrite the number of timesteps",
    )
    parser.add_argument(
        "--num-threads",
        type=int,
        default=-1,
        help="Number of threads for PyTorch (-1 to use default)",
    )
    # Continue training an already trained agent
    parser.add_argument(
        "-i",
        "--trained-agent",
        type=str,
        default="",
        help="Path to a pretrained agent to continue training",
    )
    # Random seed
    parser.add_argument("--seed", type=int, default=-1, help="Random generator seed")
    # Saving of model
    parser.add_argument(
        "--save-freq",
        type=int,
        default=10000,
        help="Save the model every n steps (if negative, no checkpoint)",
    )
    parser.add_argument(
        "--save-replay-buffer",
        type=str2bool,
        default=False,
        help="Save the replay buffer too (when applicable)",
    )
    # Pre-load a replay buffer and start training on it
    parser.add_argument(
        "--preload-replay-buffer",
        type=empty_str2none,
        default="",
        help="Path to a replay buffer that should be preloaded before starting the training process",
    )
    # Logging
    parser.add_argument(
        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
    )
    parser.add_argument(
        "-tb",
        "--tensorboard-log",
        type=empty_str2none,
        default="tensorboard_logs",
        help="Tensorboard log dir",
    )
    parser.add_argument(
        "--log-interval",
        type=int,
        default=-1,
        help="Override log interval (default: -1, no change)",
    )
    parser.add_argument(
        "-uuid",
        "--uuid",
        type=str2bool,
        default=False,
        help="Ensure that the run has a unique ID",
    )
    # Evaluation
    parser.add_argument(
        "--eval-freq",
        type=int,
        default=-1,
        help="Evaluate the agent every n steps (if negative, no evaluation)",
    )
    parser.add_argument(
        "--eval-episodes",
        type=int,
        default=5,
        help="Number of episodes to use for evaluation",
    )
    # Verbosity
    parser.add_argument(
        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
    )
    # HER specifics
    parser.add_argument(
        "--truncate-last-trajectory",
        type=str2bool,
        default=True,
        help="When using HER with online sampling the last trajectory in the replay buffer will be truncated after reloading the replay buffer.",
    )
    args, unknown = parser.parse_known_args()
    main(args=args)
--- a/env_manager/rbs_gym/scripts/spawner.py
+++ b/env_manager/rbs_gym/scripts/spawner.py
@ -0,0 +1,200 @@
 #!/usr/bin/env python3
 import time
 import gym_gz_models
 import gym_gz
 from scenario import gazebo as scenario_gazebo
 from scenario import core as scenario_core
 import rclpy
 from rclpy.node import Node
 from scipy.spatial.transform import Rotation as R
 import numpy as np
 from geometry_msgs.msg import PoseStamped
 from rclpy.executors import MultiThreadedExecutor
 from rbs_skill_servers import CartesianControllerPublisher, TakePose
 from rclpy.action import ActionClient
 from control_msgs.action import GripperCommand
 class Spawner(Node):
    def __init__(self):
        super().__init__("spawner")
        self.gazebo = scenario_gazebo.GazeboSimulator(step_size=0.001,
                                                 rtf=1.0,
                                                 steps_per_run=1)
        self.cartesian_pose = self.create_publisher(
            PoseStamped, 
            "/" + "arm0" + "/cartesian_motion_controller/target_frame", 10)
        self.current_pose_sub = self.create_subscription(PoseStamped, 
                                 "/arm0/cartesian_motion_controller/current_pose", self.callback, 10)
        self._action_client = ActionClient(self, 
                                           GripperCommand,
                                           "/" + "arm0" + "/gripper_controller/gripper_cmd")
        timer_period = 0.001  # seconds
        self.timer = self.create_timer(timer_period, self.timer_callback)
        self.ano_timer = self.create_timer(timer_period, self.another_timer)
        scenario_gazebo.set_verbosity(scenario_gazebo.Verbosity_info)
        self.gazebo.insert_world_from_sdf(
            "/home/bill-finger/rbs_ws/install/rbs_simulation/share/rbs_simulation/worlds/asm2.sdf")
        self.gazebo.initialize()
        self.world = self.gazebo.get_world()
        self.current_pose: PoseStamped = PoseStamped()
        self.init_sim()
        self.cube = self.world.get_model("cube")
        self.stage = 0
        self.gripper_open = False
    def callback(self, msg: PoseStamped):
        self.current_pose = msg
    def timer_callback(self):
        self.gazebo.run()
    def send_goal(self, goal: float):
        goal_msg = GripperCommand.Goal()
        goal_msg._command.position = goal
        goal_msg._command.max_effort = 1.0
        self._action_client.wait_for_server()
        self.gripper_open = not self.gripper_open
        self._send_goal_future = self._action_client.send_goal_async(goal_msg)
        self._send_goal_future.add_done_callback(self.goal_response_callback)
    def goal_response_callback(self, future):
        goal_handle = future.result()
        if not goal_handle.accepted:
            self.get_logger().info('Goal rejected :(')
            return
        self.get_logger().info('Goal accepted :)')
        self._get_result_future = goal_handle.get_result_async()
        self._get_result_future.add_done_callback(self.get_result_callback)
    def get_result_callback(self, future):
        result = future.result().result
        self.get_logger().info('Result: {0}'.format(result.position))
    def another_timer(self):
        position_over_cube = np.array(self.cube.base_position()) + np.array([0, 0, 0.2])
        position_cube = np.array(self.cube.base_position()) + np.array([0, 0, 0.03])
        quat_xyzw = R.from_euler(seq="y", angles=180, degrees=True).as_quat()
        if self.stage == 0:
            if self.distance_to_target(position_over_cube, quat_xyzw) > 0.01:
                self.cartesian_pose.publish(self.get_pose(position_over_cube, quat_xyzw))
            if self.distance_to_target(position_over_cube, quat_xyzw) < 0.01:
                self.stage += 1
        if self.stage == 1:
            if self.distance_to_target(position_cube, quat_xyzw) > 0.01:
                if not self.gripper_open:
                    self.send_goal(0.064)
                # rclpy.spin_until_future_complete(self, future)
                self.cartesian_pose.publish(self.get_pose(position_cube, quat_xyzw))
            if self.distance_to_target(position_cube, quat_xyzw) < 0.01:
                self.stage += 1
    def distance_to_target(self, position, orientation):
        target_pose = self.get_pose(position, orientation)
        current_position = np.array([
            self.current_pose.pose.position.x,
            self.current_pose.pose.position.y,
            self.current_pose.pose.position.z
        ])
        target_position = np.array([
            target_pose.pose.position.x,
            target_pose.pose.position.y,
            target_pose.pose.position.z
        ])
        distance = np.linalg.norm(current_position - target_position)
        return distance
    def init_sim(self):
        # Create the simulator
        self.gazebo.gui()
        self.gazebo.run(paused=True)
        self.world.to_gazebo().set_gravity((0, 0, -9.8))
        self.world.insert_model("/home/bill-finger/rbs_ws/current.urdf")
        self.gazebo.run(paused=True)
        for model_name in self.world.model_names():
            model = self.world.get_model(model_name)
            print(f"Model: {model_name}")
            print(f"  Base link: {model.base_frame()}")
            print("LINKS")
            for name in model.link_names():
                position = model.get_link(name).position()
                orientation_wxyz = np.asarray(model.get_link(name).orientation())
                orientation = R.from_quat(orientation_wxyz[[1, 2, 3, 0]]).as_euler("xyz")
                print(f"  {name}:", (*position, *tuple(orientation)))
            print("JOINTS")
            for name in model.joint_names():
                print(f"{name}")
        uri = lambda org, name: f"https://fuel.gazebosim.org/{org}/models/{name}"
        # Download the cube SDF file
        cube_sdf = scenario_gazebo.get_model_file_from_fuel(
            uri=uri(org="openrobotics", name="wood cube 5cm"), use_cache=False
        )
        # Sample a random position
        random_position = np.random.uniform(low=[-0.2, -0.2, 0.0], high=[-0.3, 0.2, 0.0])
        # Get a unique name
        model_name = gym_gz.utils.scenario.get_unique_model_name(
            world=self.world, model_name="cube"
        )
        # Insert the model
        assert self.world.insert_model(
            cube_sdf, scenario_core.Pose(random_position, [1.0, 0, 0, 0]), model_name
        )
        model = self.world.get_model("rbs_arm")
        self.cube = self.world.get_model("cube")
        ok_reset_pos = model.to_gazebo().reset_joint_positions(
            [0.0, -0.240, -3.142, 1.090, 0, 1.617, 0.0, 0.0, 0.0],
            [name for name in model.joint_names() if "_joint" in name]
        )
        if not ok_reset_pos:
            raise RuntimeError("Failed to reset the robot state")
    def get_pose(self, position, orientation) -> PoseStamped:
        msg = PoseStamped()
        msg.header.stamp = self.get_clock().now().to_msg()
        msg.header.frame_id = "base_link"
        msg.pose.position.x = position[0]
        msg.pose.position.y = position[1]
        msg.pose.position.z = position[2]
        msg.pose.orientation.x = orientation[0]
        msg.pose.orientation.y = orientation[1]
        msg.pose.orientation.z = orientation[2]
        msg.pose.orientation.w = orientation[3]
        return msg
 def main(args=None):
    rclpy.init(args=args)
    executor = MultiThreadedExecutor()
    my_node = Spawner()
    executor.add_node(my_node)
    executor.spin()
    my_node.gazebo.close()
    my_node.destroy_node()
    rclpy.shutdown()
 if __name__ == "__main__":
    main()
--- a/env_manager/rbs_gym/scripts/test_agent.py
+++ b/env_manager/rbs_gym/scripts/test_agent.py
@ -0,0 +1,101 @@
 #!/usr/bin/env python3
 import argparse
 from typing import Dict
 import gymnasium as gym
 from stable_baselines3.common.env_checker import check_env
 from rbs_gym import envs as gz_envs
 from rbs_gym.utils.utils import StoreDict, str2bool
 def main(args: Dict):
    # Create the environment
    env = gym.make(args.env, **args.env_kwargs)
    # Initialize random seed
    env.seed(args.seed)
    # Check the environment
    if args.check_env:
        check_env(env, warn=True, skip_render_check=True)
    # Step environment for bunch of episodes
    for episode in range(args.n_episodes):
        # Initialize returned values
        done = False
        total_reward = 0
        # Reset the environment
        observation = env.reset()
        # Step through the current episode until it is done
        while not done:
            # Sample random action
            action = env.action_space.sample()
            # Step the environment with the random action
            observation, reward, truncated, terminated, info = env.step(action)
            done = truncated or terminated
            # Accumulate the reward
            total_reward += reward
        print(f"Episode #{episode}\n\treward: {total_reward}")
    # Cleanup once done
    env.close()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Environment and its parameters
    parser.add_argument(
        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
    )
    parser.add_argument(
        "--env-kwargs",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional keyword argument to pass to the env constructor",
    )
    # Number of episodes to run
    parser.add_argument(
        "-n",
        "--n-episodes",
        type=int,
        default=10000,
        help="Overwrite the number of episodes",
    )
    # Random seed
    parser.add_argument("--seed", type=int, default=69, help="Random generator seed")
    # Flag to check environment
    parser.add_argument(
        "--check-env",
        type=str2bool,
        default=True,
        help="Flag to check the environment before running the random agent",
    )
    # Flag to enable rendering
    parser.add_argument(
        "--render",
        type=str2bool,
        default=False,
        help="Flag to enable rendering",
    )
    args, unknown = parser.parse_known_args()
    main(args=args)
--- a/env_manager/rbs_gym/scripts/train.py
+++ b/env_manager/rbs_gym/scripts/train.py
@ -0,0 +1,284 @@
 #!/usr/bin/env -S python3 -O
 import argparse
 import difflib
 import os
 import uuid
 from typing import Dict
 import gymnasium as gym
 import numpy as np
 import torch as th
 from stable_baselines3.common.utils import set_random_seed
 from rbs_gym import envs as gz_envs
 from rbs_gym.utils.exp_manager import ExperimentManager
 from rbs_gym.utils.utils import ALGOS, StoreDict, empty_str2none, str2bool
 def main(args: Dict):
    # Check if the selected environment is valid
    # If it could not be found, suggest the closest match
    registered_envs = set(gym.envs.registry.keys())
    if args.env not in registered_envs:
        try:
            closest_match = difflib.get_close_matches(args.env, registered_envs, n=1)[0]
        except IndexError:
            closest_match = "'no close match found...'"
        raise ValueError(
            f"{args.env} not found in gym registry, you maybe meant {closest_match}?"
        )
    # If no specific seed is selected, choose a random one
    if args.seed < 0:
        args.seed = np.random.randint(2**32 - 1, dtype=np.int64).item()
    # Set the random seed across platforms
    set_random_seed(args.seed)
    # Setting num threads to 1 makes things run faster on cpu
    if args.num_threads > 0:
        if args.verbose > 1:
            print(f"Setting torch.num_threads to {args.num_threads}")
        th.set_num_threads(args.num_threads)
    # Verify that pre-trained agent exists before continuing to train it
    if args.trained_agent != "":
        assert args.trained_agent.endswith(".zip") and os.path.isfile(
            args.trained_agent
        ), "The trained_agent must be a valid path to a .zip file"
    # If enabled, ensure that the run has a unique ID
    uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""
    print("=" * 10, args.env, "=" * 10)
    print(f"Seed: {args.seed}")
    if args.track:
        try:
            import wandb
            import datetime
        except ImportError as e:
            raise ImportError(
                "if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`"
            ) from e
        run_name = f"{args.env}__{args.algo}__{args.seed}__{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
        tags = []
        run = wandb.init(
            name=run_name,
            project="rbs-gym",
            entity=None,
            tags=tags,
            config=vars(args),
            sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
            monitor_gym=False,  # auto-upload the videos of agents playing the game
            save_code=True,  # optional
        )
        args.tensorboard_log = f"runs/{run_name}"
    exp_manager = ExperimentManager(
        args,
        args.algo,
        args.env,
        args.log_folder,
        args.tensorboard_log,
        args.n_timesteps,
        args.eval_freq,
        args.eval_episodes,
        args.save_freq,
        args.hyperparams,
        args.env_kwargs,
        args.trained_agent,
        args.optimize_hyperparameters,
        truncate_last_trajectory=args.truncate_last_trajectory,
        uuid_str=uuid_str,
        seed=args.seed,
        log_interval=args.log_interval,
        save_replay_buffer=args.save_replay_buffer,
        preload_replay_buffer=args.preload_replay_buffer,
        verbose=args.verbose,
        vec_env_type=args.vec_env,
        no_optim_plots=args.no_optim_plots,
    )
    # Prepare experiment
    results = exp_manager.setup_experiment()
    if results is not None:
        model, saved_hyperparams = results
        if args.track:
            # we need to save the loaded hyperparameters
            args.saved_hyperparams = saved_hyperparams
            assert run is not None  # make mypy happy
            run.config.setdefaults(vars(args))
        # Normal training
        if model is not None:
            exp_manager.learn(model)
            exp_manager.save_trained_model(model)
    else:
        exp_manager.hyperparameters_optimization()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Environment and its parameters
    parser.add_argument(
        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
    )
    parser.add_argument(
        "--env-kwargs",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional keyword argument to pass to the env constructor",
    )
    parser.add_argument(
        "--vec-env",
        type=str,
        choices=["dummy", "subproc"],
        default="dummy",
        help="Type of VecEnv to use",
    )
    # Algorithm and training
    parser.add_argument(
        "--algo",
        type=str,
        choices=list(ALGOS.keys()),
        required=False,
        default="sac",
        help="RL algorithm to use during the training",
    )
    parser.add_argument(
        "-params",
        "--hyperparams",
        type=str,
        nargs="+",
        action=StoreDict,
        help="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10)",
    )
    parser.add_argument(
        "-n",
        "--n-timesteps",
        type=int,
        default=-1,
        help="Overwrite the number of timesteps",
    )
    parser.add_argument(
        "--num-threads",
        type=int,
        default=-1,
        help="Number of threads for PyTorch (-1 to use default)",
    )
    # Continue training an already trained agent
    parser.add_argument(
        "-i",
        "--trained-agent",
        type=str,
        default="",
        help="Path to a pretrained agent to continue training",
    )
    # Random seed
    parser.add_argument("--seed", type=int, default=-1, help="Random generator seed")
    # Saving of model
    parser.add_argument(
        "--save-freq",
        type=int,
        default=10000,
        help="Save the model every n steps (if negative, no checkpoint)",
    )
    parser.add_argument(
        "--save-replay-buffer",
        type=str2bool,
        default=False,
        help="Save the replay buffer too (when applicable)",
    )
    # Pre-load a replay buffer and start training on it
    parser.add_argument(
        "--preload-replay-buffer",
        type=empty_str2none,
        default="",
        help="Path to a replay buffer that should be preloaded before starting the training process",
    )
    parser.add_argument(
        "--track",
        type=str2bool,
        default=False,
        help="Track experiment using wandb"
    )
    # optimization parameters
    parser.add_argument(
        "--optimize-hyperparameters",
        type=str2bool,
        default=False,
        help="Run optimization or not?"
    )
    parser.add_argument(
        "--no-optim-plots", action="store_true", default=False, help="Disable hyperparameter optimization plots"
    )
    # Logging
    parser.add_argument(
        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
    )
    parser.add_argument(
        "-tb",
        "--tensorboard-log",
        type=empty_str2none,
        default="tensorboard_logs",
        help="Tensorboard log dir",
    )
    parser.add_argument(
        "--log-interval",
        type=int,
        default=-1,
        help="Override log interval (default: -1, no change)",
    )
    parser.add_argument(
        "-uuid",
        "--uuid",
        type=str2bool,
        default=False,
        help="Ensure that the run has a unique ID",
    )
    # Evaluation
    parser.add_argument(
        "--eval-freq",
        type=int,
        default=-1,
        help="Evaluate the agent every n steps (if negative, no evaluation)",
    )
    parser.add_argument(
        "--eval-episodes",
        type=int,
        default=5,
        help="Number of episodes to use for evaluation",
    )
    # Verbosity
    parser.add_argument(
        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
    )
    # HER specifics
    parser.add_argument(
        "--truncate-last-trajectory",
        type=str2bool,
        default=True,
        help="When using HER with online sampling the last trajectory in the replay buffer will be truncated after reloading the replay buffer.",
    )
    args, unknown = parser.parse_known_args()
    main(args=args)
--- a/env_manager/rbs_gym/scripts/velocity.py
+++ b/env_manager/rbs_gym/scripts/velocity.py
@ -0,0 +1,138 @@
 #!/usr/bin/env python3
 import rclpy
 from rclpy.node import Node
 import numpy as np
 import quaternion
 from geometry_msgs.msg import Twist
 from geometry_msgs.msg import PoseStamped
 import tf2_ros
 import sys
 import time
 import threading
 import os
 class Converter(Node):
    """Convert Twist messages to PoseStamped
    Use this node to integrate twist messages into a moving target pose in
    Cartesian space.  An initial TF lookup assures that the target pose always
    starts at the robot's end-effector.
    """
    def __init__(self):
        super().__init__("converter")
        self.twist_topic = self.declare_parameter("twist_topic", "/cartesian_motion_controller/target_twist").value
        self.pose_topic = self.declare_parameter("pose_topic", "/cartesian_motion_controller/target_frame").value
        self.frame_id = self.declare_parameter("frame_id", "base_link").value
        self.end_effector = self.declare_parameter("end_effector", "gripper_grasp_point").value
        self.tf_buffer = tf2_ros.Buffer()
        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self)
        self.rot = np.quaternion(0, 0, 0, 1)
        self.pos = [0, 0, 0]
        self.pub = self.create_publisher(PoseStamped, self.pose_topic, 3)
        self.sub = self.create_subscription(Twist, self.twist_topic, self.twist_cb, 1)
        self.last = time.time()
        self.startup_done = False
        period = 1.0 / self.declare_parameter("publishing_rate", 100).value
        self.timer = self.create_timer(period, self.publish)
        self.thread = threading.Thread(target=self.startup, daemon=True)
        self.thread.start()
    def startup(self):
        """Make sure to start at the robot's current pose"""
        # Wait until we entered spinning in the main thread.
        time.sleep(1)
        try:
            start = self.tf_buffer.lookup_transform(
                target_frame=self.frame_id,
                source_frame=self.end_effector,
                time=rclpy.time.Time(),
            )
        except (
            tf2_ros.InvalidArgumentException,
            tf2_ros.LookupException,
            tf2_ros.ConnectivityException,
            tf2_ros.ExtrapolationException,
        ) as e:
            print(f"Startup failed: {e}")
            os._exit(1)
        self.pos[0] = start.transform.translation.x
        self.pos[1] = start.transform.translation.y
        self.pos[2] = start.transform.translation.z
        self.rot.x = start.transform.rotation.x
        self.rot.y = start.transform.rotation.y
        self.rot.z = start.transform.rotation.z
        self.rot.w = start.transform.rotation.w
        self.startup_done = True
    def twist_cb(self, data):
        """Numerically integrate twist message into a pose
        Use global self.frame_id as reference for the navigation commands.
        """
        now = time.time()
        dt = now - self.last
        self.last = now
        # Position update
        self.pos[0] += data.linear.x * dt
        self.pos[1] += data.linear.y * dt
        self.pos[2] += data.linear.z * dt
        # Orientation update
        wx = data.angular.x
        wy = data.angular.y
        wz = data.angular.z
        _, q = quaternion.integrate_angular_velocity(
            lambda _: (wx, wy, wz), 0, dt, self.rot
        )
        self.rot = q[-1]  # the last one is after dt passed
    def publish(self):
        if not self.startup_done:
            return
        try:
            msg = PoseStamped()
            msg.header.stamp = self.get_clock().now().to_msg()
            msg.header.frame_id = self.frame_id
            msg.pose.position.x = self.pos[0]
            msg.pose.position.y = self.pos[1]
            msg.pose.position.z = self.pos[2]
            msg.pose.orientation.x = self.rot.x
            msg.pose.orientation.y = self.rot.y
            msg.pose.orientation.z = self.rot.z
            msg.pose.orientation.w = self.rot.w
            self.pub.publish(msg)
        except Exception:
            # Swallow 'publish() to closed topic' error.
            # This rarely happens on killing this node.
            pass
 def main(args=None):
    rclpy.init(args=args)
    node = Converter()
    rclpy.spin(node)
 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        rclpy.shutdown()
        sys.exit(0)
    except Exception as e:
        print(e)
        sys.exit(1)
--- a/rbs_bringup/config/roboclone.yaml
+++ b/rbs_bringup/config/roboclone.yaml
@ -4,7 +4,7 @@ scene_config:
    pose:
      position:
        x: -0.45
-        y: -2.0
+        y: 0.0
        z: 1.6
      orientation:
        x: 3.14159
@ -15,7 +15,7 @@ scene_config:
    pose:
      position:
        x: 0.45
-        y: -2.0
+        y: 0.0
        z: 1.6
      orientation:
        x: 3.14159
--- a/rbs_bringup/launch/multi_robot.launch.py
+++ b/rbs_bringup/launch/multi_robot.launch.py
@ -285,7 +285,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("robots_config_file", 
-                              default_value="robot_scene", 
+                              default_value="roboclone.yaml", 
                              description="Filename for config file with robots in scene")
    )
--- a/rbs_bringup/launch/rbs_robot.launch.py
+++ b/rbs_bringup/launch/rbs_robot.launch.py
@ -14,12 +14,14 @@ from launch_ros.substitutions import FindPackageShare
 import xacro
 import os
 from ament_index_python.packages import get_package_share_directory
 from rbs_launch_utils.launch_common import load_yaml
 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
    launch_rviz = LaunchConfiguration("launch_rviz")
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
@ -44,14 +46,20 @@ def launch_setup(context, *args, **kwargs):
    pitch = LaunchConfiguration("pitch")
    yaw = LaunchConfiguration("yaw")
    namespace = LaunchConfiguration("namespace")
    multi_robot = LaunchConfiguration("multi_robot")
    robot_name = robot_name.perform(context)
    namespace = namespace.perform(context)
    robot_type = robot_type.perform(context)
    description_package = description_package.perform(context)
    description_file = description_file.perform(context)
    controllers_file = controllers_file.perform(context)
    multi_robot = multi_robot.perform(context)
-    # remappings = [("/tf", "tf"), ("/tf_static", "tf_static")]
+    remappings = []
    if multi_robot == "true":
        remappings.append([("/tf", "tf"), ("/tf_static", "tf_static")])
    controllers_file = os.path.join(get_package_share_directory(description_package), "config", controllers_file)
    xacro_file = os.path.join(get_package_share_directory(description_package), "urdf", description_file)
    robot_description_doc = xacro.process_file(
@ -67,7 +75,6 @@ def launch_setup(context, *args, **kwargs):
            "roll": roll.perform(context),
            "pitch": pitch.perform(context),
            "yaw": yaw.perform(context)
            #TODO: add rotation and add probably via dict
        }
    )
@ -83,7 +90,7 @@ def launch_setup(context, *args, **kwargs):
                [FindPackageShare(moveit_config_package), "config/moveit", "rbs_arm.srdf.xacro"]
            ),
            " ",
-            "name:=",robot_name," ",
+            "name:=",robot_type," ",
            "with_gripper:=",with_gripper_condition, " ",
            "gripper_name:=", gripper_name, " ",
        ]
@ -93,12 +100,19 @@ def launch_setup(context, *args, **kwargs):
    robot_description_kinematics = PathJoinSubstitution(
        [FindPackageShare(moveit_config_package), "config", "kinematics.yaml"]
    )
    # kinematics_yaml = load_yaml("rbs_arm", "config/kinematics.yaml")
    robot_description_kinematics = {"robot_description_kinematics": robot_description_kinematics}
    rviz_config_file = PathJoinSubstitution(
        [FindPackageShare("rbs_bringup"), "config", "rbs.rviz"]
    )
    robot_state_publisher = Node(
        package="robot_state_publisher",
        executable="robot_state_publisher",
        namespace=namespace,
        output="both",
-        # remappings=remappings,
+        remappings=remappings,
        parameters=[{"use_sim_time": use_sim_time}, robot_description],
    )
@ -121,6 +135,19 @@ def launch_setup(context, *args, **kwargs):
        ]
    )
    rviz = Node(
        package="rviz2",
        executable="rviz2",
        name="rviz2",
        output="log",
        arguments=["-d", rviz_config_file],
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics
        ],
        condition=IfCondition(launch_rviz))
    control = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
@ -209,9 +236,9 @@ def launch_setup(context, *args, **kwargs):
        control,
        moveit,
        skills,
-        task_planner,
+        # task_planner,
-        perception,
+        # perception,
-        # rviz
+        rviz
    ]
    return nodes_to_start
@ -230,7 +257,7 @@ def generate_launch_description():
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
-            default_value="rbs_arm_controllers_gazebosim.yaml",
+            default_value="rbs_arm0_controllers.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
@ -310,7 +337,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_rviz",
-                              default_value="true",
+                              default_value="false",
                              description="Launch RViz?")
    )
    declared_arguments.append(
@ -401,6 +428,29 @@ def generate_launch_description():
                              default_value="0.0",
                              description="Position of robot in world by Z")
    )
    declared_arguments.append(
        DeclareLaunchArgument("roll", 
                              default_value="0.0", 
                              description="Position of robot in world by Z")
    )
    declared_arguments.append(
        DeclareLaunchArgument("pitch", 
                              default_value="0.0", 
                              description="Position of robot in world by Z")
    )
    declared_arguments.append(
        DeclareLaunchArgument("yaw", 
                              default_value="0.0", 
                              description="Position of robot in world by Z")
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "multi_robot",
            default_value="false",
            description="Flag if you use multi robot setup"
        )
    )
    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)])
--- a/rbs_bringup/launch/single_robot.launch.py
+++ b/rbs_bringup/launch/single_robot.launch.py
@ -76,6 +76,9 @@ def launch_setup(context, *args, **kwargs):
    )
    # controller_paramfile = configured_params.perform(context)
    # controller_paramfile = PathJoinSubstitution([
    #     FindPackageShare(robot_type), "config", "rbs_arm0_controllers.yaml"
    # ])
    # namespace = "/" + robot_name.perform(context) 
    namespace = ""
@ -102,7 +105,7 @@ def launch_setup(context, *args, **kwargs):
            "gripper_name": gripper_name,
            "controllers_file": controllers_file,
            "robot_type": robot_type,
-            "controllers_file": initial_joint_controllers_file_path,
+            # "controllers_file": controller_paramfile,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
@ -148,7 +151,7 @@ def generate_launch_description():
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
-            default_value="rbs_arm_controllers_gazebosim.yaml",
+            default_value="rbs_arm0_controllers.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
@ -221,7 +224,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
-                              default_value="true", 
+                              default_value="false", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
--- a/rbs_bringup/rbs_launch_utils/launch_common.py
+++ b/rbs_bringup/rbs_launch_utils/launch_common.py
@ -4,7 +4,7 @@ import math
 import os
 import sys
 import yaml
-from typing import Dict
+from typing import Any, Dict
 from ament_index_python.packages import get_package_share_directory
@ -24,7 +24,7 @@ def construct_angle_degrees(loader, node) -> float:
    """Utility function for converting degrees into radians from yaml."""
    return math.radians(construct_angle_radians(loader, node))
-def load_yaml(package_name: str, file_path: str) -> Dict:
+def load_yaml(package_name: str, file_path: str) -> dict[str, Any]:
    package_path = get_package_share_directory(package_name)
    absolute_file_path = os.path.join(package_path, file_path)
--- a/rbs_bt_executor/bt_trees/test_tree.xml
+++ b/rbs_bt_executor/bt_trees/test_tree.xml
@ -6,7 +6,7 @@
            <Action ID="EnvStarter" env_class="gz_enviroment::GzEnviroment" env_name="gz_enviroment"
                server_name="/env_manager/start_env" server_timeout="1000" workspace="{workspace}" />
            <SubTreePlus ID="WorkspaceInspection" __autoremap="1" goal_pose="{workspace}"
-                robot_name="rbs_arm" />
+                robot_name="ur_manipulator" />
        </Sequence>
    </BehaviorTree>
    <!-- ////////// -->
--- a/rbs_bt_executor/src/MoveToPoseArray.cpp
+++ b/rbs_bt_executor/src/MoveToPoseArray.cpp
@ -28,6 +28,8 @@ public:
    if (!target_pose_vec_.poses.empty()) {
      goal.robot_name = robot_name_;
      goal.target_pose = target_pose_vec_.poses.at(0);
      goal.end_effector_acceleration = 1.0;
      goal.end_effector_velocity = 1.0;
      target_pose_vec_.poses.erase(target_pose_vec_.poses.begin());
      setOutput<geometry_msgs::msg::PoseArray>("pose_vec_out",
--- a/rbs_simulation/launch/rbs_simulation.launch.py
+++ b/rbs_simulation/launch/rbs_simulation.launch.py
@ -1,561 +0,0 @@
 import os
 from launch import LaunchDescription, LaunchContext
 from launch.actions import (
    DeclareLaunchArgument,
    IncludeLaunchDescription,
    ExecuteProcess,
    OpaqueFunction
 )
 from ament_index_python.packages import get_package_share_directory
 from launch.conditions import IfCondition, UnlessCondition
 from launch.launch_description_sources import PythonLaunchDescriptionSource
 from launch.substitutions import Command, FindExecutable, LaunchConfiguration, PathJoinSubstitution
 from launch_ros.actions import Node
 from launch_ros.substitutions import FindPackageShare
 from ur_moveit_config.launch_common import load_yaml
 import xacro
 def generate_launch_description():
    declared_arguments = []
    # UR specific arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "rbs_robot_type",
            description="Type of robot by name",
            choices=["ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
            default_value="ur5e",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "with_gripper",
            default_value="false",
            description="With gripper or not?",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "safety_limits",
            default_value="true",
            description="Enables the safety limits controller if true.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "safety_pos_margin",
            default_value="0.15",
            description="The margin to lower and upper limits in the safety controller.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "safety_k_position",
            default_value="20",
            description="k-position factor in the safety controller.",
        )
    )
    # General arguments
    declared_arguments.append(
        DeclareLaunchArgument(
            "runtime_config_package",
            default_value="ur_moveit_config",
            description='Package with the controller\'s configuration in "config" folder. \
        Usually the argument is not set, it enables use of a custom setup.',
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
            default_value="ur_controllers.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_with_gripper_file",
            default_value="ur_plus_gripper_controllers.yaml",
            description="YAML file with the UR + gripper_controller configuration.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_package",
            default_value="ur_description",
            description="Description package with robot URDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom description.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "description_file",
            default_value="ur.urdf.xacro",
            description="URDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "tf_prefix",
            default_value='""',
            description="tf_prefix of the joint names, useful for \
        multi-robot setup. If changed than also joint names in the controllers' configuration \
        have to be updated.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "start_joint_controller",
            default_value="false",
            description="Enable headless mode for robot control",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_joint_controller",
            default_value="joint_trajectory_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "initial_gripper_controller",
            default_value="gripper_controller",
            description="Robot controller to start.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_package",
            default_value="ur_moveit_config",
            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
        is not set, it enables use of a custom moveit config.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "moveit_config_file",
            default_value="ur.srdf.xacro",
            description="MoveIt SRDF/XACRO description file with the robot.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument(
            "use_sim_time",
            default_value="true",
            description="Make MoveIt to use simulation time. This is needed for the trajectory planing in simulation.",
        )
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_rviz", default_value="true", description="Launch RViz?")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_gazebo", default_value="false", description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_mujoco", default_value="true", description="Gazebo Simulation")
    )
    declared_arguments.append(
        DeclareLaunchArgument("sim_fake", default_value="false", description="Gazebo Simulation")
    )
    # Initialize Arguments
    rbs_robot_type = LaunchConfiguration("rbs_robot_type")
    safety_limits = LaunchConfiguration("safety_limits")
    safety_pos_margin = LaunchConfiguration("safety_pos_margin")
    safety_k_position = LaunchConfiguration("safety_k_position")
    # General arguments
    runtime_config_package = LaunchConfiguration("runtime_config_package")
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    description_package = LaunchConfiguration("description_package")
    description_file = LaunchConfiguration("description_file")
    tf_prefix = LaunchConfiguration("tf_prefix")
    start_joint_controller = LaunchConfiguration("start_joint_controller")
    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
    initial_gripper_controller = LaunchConfiguration("initial_gripper_controller")
    launch_rviz = LaunchConfiguration("launch_rviz")
    moveit_config_package = LaunchConfiguration("moveit_config_package")
    moveit_config_file = LaunchConfiguration("moveit_config_file")
    use_sim_time = LaunchConfiguration("use_sim_time")
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    sim_mujoco = LaunchConfiguration("sim_mujoco")
    sim_fake = LaunchConfiguration("sim_fake")
    initial_joint_controllers_file_path = PathJoinSubstitution(
        [FindPackageShare(runtime_config_package), "config", controllers_file]
    )
    rviz_config_file = PathJoinSubstitution(
        [FindPackageShare(moveit_config_package), "rviz", "view_robot.rviz"]
    )
    world_config_file = PathJoinSubstitution(
        [FindPackageShare("rbs_simulation"), "worlds", "mir.sdf"]
    )
    mujoco_model = PathJoinSubstitution(
        [FindPackageShare("rbs_simulation"), "mujoco_model", "current_mj.xml"]
    )
    assemble_dir = os.path.join(
        get_package_share_directory("rbs_task_planner"), "example", "sdf_models"
    )
    points_params = load_yaml("rbs_bt_executor", "config/gripperPositions.yaml")
    robot_description_content = Command(
        [
            PathJoinSubstitution([FindExecutable(name="xacro")]),
            " ",
            PathJoinSubstitution(
                [FindPackageShare(description_package), "urdf", description_file]
            ),
            " ",
            "safety_limits:=", safety_limits, " ",
            "safety_pos_margin:=", safety_pos_margin, " ",
            "safety_k_position:=", safety_k_position, " ",
            "name:=", "ur", " ",
            "ur_type:=", rbs_robot_type, " ",
            "tf_prefix:=", tf_prefix, " ",
            "sim_mujoco:=", sim_mujoco, " ",
            "sim_gazebo:=", sim_gazebo, " ",
            "sim_fake:=", sim_fake, " ",
            "simulation_controllers:=", initial_joint_controllers_file_path, " ",
            "with_gripper:=", with_gripper_condition, " ",
            "mujoco_model:=", mujoco_model,
        ]
    )
    robot_description = {"robot_description": robot_description_content}
    control_node = Node(
        package="controller_manager",
        executable="ros2_control_node",
        parameters=[robot_description, initial_joint_controllers_file_path],
        output="both",
        remappings=[
            ('motion_control_handle/target_frame', 'target_frame'),
            ('cartesian_compliance_controller/target_frame', 'target_frame'),
            ('cartesian_compliance_controller/target_wrench', 'target_wrench'),
            ('cartesian_compliance_controller/ft_sensor_wrench', 'ft_sensor_wrench'),
            ]
    )
    robot_state_publisher_node = Node(
        package="robot_state_publisher",
        executable="robot_state_publisher",
        output="both",
        parameters=[{"use_sim_time": True}, robot_description],
    )
    joint_state_broadcaster_spawner = Node(
        package="controller_manager",
        executable="spawner",
        arguments=["joint_state_broadcaster", "-c", "/controller_manager"],
    )
    # There may be other controllers of the joints, but this is the initially-started one
    initial_joint_controller_spawner_started = Node(
        package="controller_manager",
        executable="spawner",
        arguments=[initial_joint_controller, "-c", "/controller_manager"],
        condition=IfCondition(start_joint_controller),
    )
    initial_joint_controller_spawner_stopped = Node(
        package="controller_manager",
        executable="spawner",
        arguments=[initial_joint_controller, "-c", "/controller_manager", "--inactive"],
        condition=UnlessCondition(start_joint_controller),
    )
    gripper_controller = ExecuteProcess(
        cmd=['ros2', 'control', 'load_controller', '--set-state', 'active',
             "gripper_controller"],
        output='screen',
        condition=IfCondition(with_gripper_condition)
    )
    cartesian_motion_controller_spawner = Node(
        package="controller_manager",
        executable="spawner",
        arguments=["cartesian_motion_controller", "--inactive", "-c", "/controller_manager"],
    )
    motion_control_handle_spawner = Node(
        package="controller_manager",
        executable="spawner",
        arguments=["motion_control_handle", "--inactive", "-c", "/controller_manager"],
    )
    cartesian_compliance_controller_spawner = Node(
        package="controller_manager",
        executable="spawner",
        arguments=["cartesian_compliance_controller", "--inactive", "-c", "/controller_manager"],
    )
    # Gazebo nodes
    gazebo = IncludeLaunchDescription(
            PythonLaunchDescriptionSource(
                [os.path.join(get_package_share_directory('ros_ign_gazebo'),
                              'launch', 'ign_gazebo.launch.py')]),
                launch_arguments=[('ign_args', [' -r ',world_config_file, " --physics-engine ignition-physics-dartsim-plugin --render-engine ogre2"])],
                condition=IfCondition(sim_gazebo))
    # Spawn robot
    gazebo_spawn_robot = Node(package='ros_ign_gazebo', executable='create',
                 arguments=[
                    '-name', rbs_robot_type,
                    '-x', '0.0',
                    '-z', '0.0',
                    '-y', '0.0',
                    '-topic', '/robot_description'],
                 output='screen',
                 condition=IfCondition(sim_gazebo))
    # MoveIt Configuration
    robot_description_semantic_content = Command(
        [
            PathJoinSubstitution([FindExecutable(name="xacro")]),
            " ",
            PathJoinSubstitution(
                [FindPackageShare(moveit_config_package), "srdf", moveit_config_file]
            ),
            " ",
            "name:=",
            "ur",
            " ",
            "tf_prefix:=",
            tf_prefix,
            " ",
            "with_gripper:=",
            with_gripper_condition
        ]
    )
    robot_description_semantic = {"robot_description_semantic": robot_description_semantic_content}
    robot_description_kinematics = PathJoinSubstitution(
        [FindPackageShare(moveit_config_package), "config", "kinematics.yaml"]
    )
    # Planning Configuration
    ompl_planning_pipeline_config = {
        "move_group": {
            "planning_plugin": "ompl_interface/OMPLPlanner",
            "request_adapters": """default_planner_request_adapters/AddTimeOptimalParameterization default_planner_request_adapters/FixWorkspaceBounds default_planner_request_adapters/FixStartStateBounds default_planner_request_adapters/FixStartStateCollision default_planner_request_adapters/FixStartStatePathConstraints""",
            "start_state_max_bounds_error": 0.1,
        }
    }
    ompl_planning_yaml = load_yaml("ur_moveit_config", "config/ompl_planning.yaml")
    ompl_planning_pipeline_config["move_group"].update(ompl_planning_yaml)
    controllers_yaml = load_yaml("ur_moveit_config", "config/controllers.yaml")
    moveit_controllers = {
        "moveit_simple_controller_manager": controllers_yaml,
        "moveit_controller_manager": "moveit_simple_controller_manager/MoveItSimpleControllerManager",
    }
    trajectory_execution = {
        "moveit_manage_controllers": True,
        "trajectory_execution.allowed_execution_duration_scaling": 100.0,
        "trajectory_execution.allowed_goal_duration_margin": 0.5,
        "trajectory_execution.allowed_start_tolerance": 0.01,
    }
    planning_scene_monitor_parameters = {
        "publish_planning_scene": True,
        "publish_geometry_updates": True,
        "publish_state_updates": True,
        "publish_transforms_updates": True,
    }
    move_group_node = Node(
        package="moveit_ros_move_group",
        executable="move_group",
        output="screen",
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
            ompl_planning_pipeline_config,
            trajectory_execution,
            moveit_controllers,
            planning_scene_monitor_parameters,
            {"use_sim_time": use_sim_time},
        ],
    )
    rviz_node = Node(
        package="rviz2",
        executable="rviz2",
        name="rviz2",
        output="log",
        arguments=["-d", rviz_config_file],
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
        ],
        condition=IfCondition(launch_rviz),
    )
    # TODO: Launch skill servers in other launch file
    move_topose_action_server = Node(
        package="rbs_skill_servers",
        executable="move_topose_action_server",
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
            {"use_sim_time": use_sim_time},
        ]
    )
    gripper_control_node = Node(
        package="rbs_skill_servers",
        executable="gripper_control_action_server",
        parameters= [
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
            {"use_sim_time": use_sim_time},
        ],
        condition=IfCondition(with_gripper_condition)
    )
    move_cartesian_path_action_server = Node(
        package="rbs_skill_servers",
        executable="move_cartesian_path_action_server",
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
            {"use_sim_time": use_sim_time},
        ]
    )
    move_joint_state_action_server = Node(
        package="rbs_skill_servers",
        executable="move_to_joint_states_action_server",
        parameters=[
            robot_description,
            robot_description_semantic,
            robot_description_kinematics,
            {"use_sim_time": use_sim_time},
        ]
    )
    grasp_pose_loader = Node(
        package="rbs_skill_servers",
        executable="pick_place_pose_loader_service_server",
        output="screen",
        emulate_tty=True,
        parameters=[
            points_params
        ]
    )
    assemble_state = Node(
        package="rbs_skill_servers",
        executable="assemble_state_service_server",
        output="screen",
        parameters=[
            {'assemble_tf_prefix': 'ASSEMBLE_'},
            {'assemble_dir': assemble_dir}
        ]
    )
    moveit_planning_scene_init = Node(
        package="rbs_skill_servers",
        executable="moveit_update_planning_scene_service_server",
        output="screen",
        parameters=[
            {'init_scene': world_config_file},
            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
        ]
    )
    moveit_planning_scene_init = Node(
        package="rbs_skill_servers",
        executable="moveit_update_planning_scene_service_server",
        output="screen",
        parameters=[
            {'init_scene': world_config_file},
            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
        ]
    )
    moveit_planning_scene_init = Node(
        package="rbs_skill_servers",
        executable="moveit_update_planning_scene_service_server",
        output="screen",
        parameters=[
            {'init_scene': world_config_file},
            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
        ]
    )
    # add_planning_scene_object = Node(
    #     package="rbs_skill_servers",
    #     executable="add_planning_scene_object_service",
    #     output="screen",
    #     parameters=[
    #         robot_description,
    #         robot_description_semantic,
    #         robot_description_kinematics,
    #         {"use_sim_time": use_sim_time},
    #     ]
    # )
    # remappings = [('/camera', '/camera/image'),
    #               ('/camera_info', '/camera/camera_info')]
    # # Bridge
    # bridge = Node(
    #     package='ros_gz_bridge',
    #     executable='parameter_bridge',
    #     arguments=[
    #         '/camera@sensor_msgs/msg/Image@gz.msgs.Image',
    #         '/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
    #         '/rgbd_camera/image@sensor_msgs/msg/Image@gz.msgs.Image',
    #         '/rgbd_camera/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
    #         '/rgbd_camera/depth_image@sensor_msgs/msg/Image@gz.msgs.Image',
    #         '/rgbd_camera/points@sensor_msgs/msg/PointCloud2@gz.msgs.PointCloudPacked'
    #     ],
    #     output='screen',
    #     remappings=remappings,
    # )
    # pc_filter = Node(
    #     package="rbs_perception",
    #     executable="pc_filter",
    #     output="screen",
    #     #tf_prefix=['xterm -e gdb -ex run --args'],
    # )
    grasp_marker = Node(
        package="rbs_perception",
        executable="grasp_marker_publish.py",
    )
    nodes_to_start = [
        control_node,
        robot_state_publisher_node,
        joint_state_broadcaster_spawner,
        rviz_node,
        initial_joint_controller_spawner_stopped,
        initial_joint_controller_spawner_started,
        gazebo,
        gazebo_spawn_robot,
        move_group_node,
        gripper_controller,
        gripper_control_node,
        move_topose_action_server,
        move_cartesian_path_action_server,
        move_joint_state_action_server,
        grasp_pose_loader,
        assemble_state,
        moveit_planning_scene_init,
        #add_planning_scene_object
    ]
    return LaunchDescription(declared_arguments + nodes_to_start)
--- a/rbs_simulation/launch/simulation_gazebo.launch.py
+++ b/rbs_simulation/launch/simulation_gazebo.launch.py
@ -18,7 +18,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui",
-                              default_value="true",
+                              default_value="false",
                              description="Launch env_manager?")
    )
    declared_arguments.append(
@ -32,14 +32,15 @@ def generate_launch_description():
                              description="Launch env_manager?")
    )
    declared_arguments.append(
-        DeclareLaunchArgument("gazebo_world_filename",
+        DeclareLaunchArgument("rgbd_camera",
-                              default_value="",
+                              default_value="true",
-                              description="Launch env_manager?")
+                              description="Camera are used?")
    )
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    gazebo_gui = LaunchConfiguration("gazebo_gui")
    debugger = LaunchConfiguration("debugger")
    rgbd_camera = LaunchConfiguration("rgbd_camera")
    launch_env_manager = LaunchConfiguration("launch_env_manager")
    gazebo_world_filename = LaunchConfiguration("gazebo_world_filename")
@ -51,7 +52,7 @@ def generate_launch_description():
                [os.path.join(get_package_share_directory('ros_gz_sim'),
                            'launch', 'gz_sim.launch.py')]),
            launch_arguments={
-                'gz_args': [' -r ',world_config_file, " -s"],
+                'gz_args': [' -s ', '-r ', world_config_file],
                "debugger": debugger,
            }.items(),
            condition=UnlessCondition(gazebo_gui))
@ -61,7 +62,7 @@ def generate_launch_description():
                [os.path.join(get_package_share_directory('ros_gz_sim'),
                            'launch', 'gz_sim.launch.py')]),
            launch_arguments={
-                'gz_args': [' -r ',world_config_file],
+                'gz_args': [' -r ', world_config_file],
                "debugger": debugger,
            }.items(),
            condition=IfCondition(gazebo_gui))
@ -73,11 +74,26 @@ def generate_launch_description():
        condition=IfCondition(launch_env_manager)
    )
    rgbd_sensor_bridge = Node(
        package="ros_gz_bridge",
        executable="parameter_bridge",
        arguments=[
            '/rgbd_camera/image@sensor_msgs/msg/Image@gz.msgs.Image',
            '/rgbd_camera/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
            '/rgbd_camera/depth_image@sensor_msgs/msg/Image@gz.msgs.Image',
            '/rgbd_camera/points@sensor_msgs/msg/PointCloud2@gz.msgs.PointCloudPacked'
        ],
        output='screen',
        condition=IfCondition(rgbd_camera)
    )
    clock_bridge = Node(
        package='ros_gz_bridge',
        executable='parameter_bridge',
-        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
+        arguments=[
            '/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'
        ],
        output='screen',
        condition=IfCondition(sim_gazebo))
@ -85,6 +101,7 @@ def generate_launch_description():
        gazebo,
        gazebo_server,
        clock_bridge,
        rgbd_sensor_bridge,
        env_manager
    ]
    return LaunchDescription(declared_arguments + nodes_to_start)
--- a/rbs_simulation/worlds/asm2.sdf
+++ b/rbs_simulation/worlds/asm2.sdf
@ -4,9 +4,18 @@
    <physics name='1ms' type='ignored'>
      <max_step_size>0.001</max_step_size>
      <real_time_factor>1.0</real_time_factor>
-      <real_time_update_rate>1000</real_time_update_rate>
+      <dart>
        <collision_detector>bullet</collision_detector>
        <solver>
          <solver_type>pgs</solver_type>
        </solver>
      </dart>
    </physics>
-    <plugin name='ignition::gazebo::systems::Physics' filename='ignition-gazebo-physics-system'/>
+    <plugin name='ignition::gazebo::systems::Physics' filename='ignition-gazebo-physics-system'>
      <engine>
        <filename>ignition-physics-dartsim-plugin</filename>
      </engine>
    </plugin>
    <plugin name='ignition::gazebo::systems::UserCommands' filename='ignition-gazebo-user-commands-system'/>
    <plugin name='ignition::gazebo::systems::SceneBroadcaster' filename='ignition-gazebo-scene-broadcaster-system'/>
    <plugin name='ignition::gazebo::systems::Contact' filename='ignition-gazebo-contact-system'/>
@ -94,15 +103,20 @@
      </link>
    </model>
    <!-- Manipulating objects -->
-			<!-- <include> -->
+			<include>
-			<!-- 	<name>board</name> -->
+				<name>board</name>
-			<!-- 	<uri>model://board</uri> -->
+				<uri>model://board</uri>
-   <!--      <pose>0.45 0.0 0.0 0.0 0.0 0.0</pose> -->
+        <pose>0.45 0.0 0.0 0.0 0.0 0.0</pose>
-			<!-- </include> -->
+			</include>
-			<!-- <include> -->
+			<include>
-			<!-- 	<name>bishop</name> -->
+				<name>bishop</name>
-			<!-- 	<uri>model://bishop</uri> -->
+				<uri>model://bishop</uri>
-   <!--      <pose>0.35 0.0 0.0 0.0 0.0 0.0</pose> -->
+        <pose>0.35 0.0 0.0 0.0 0.0 0.0</pose>
-			<!-- </include> -->
+			</include>
    <!-- <include> -->
    <!--   <name>bishop</name> -->
    <!--   <uri>model://box1</uri> -->
    <!--   <pose>0.45 0.0 0 0 0 0</pose> -->
    <!-- </include> -->
  </world>
 </sdf>
--- a/rbs_skill_servers/CMakeLists.txt
+++ b/rbs_skill_servers/CMakeLists.txt
@ -34,6 +34,7 @@ find_package(tinyxml2_vendor REQUIRED)
 find_package(TinyXML2 REQUIRED)
 find_package(Eigen3 3.3 REQUIRED)
 find_package(rbs_utils REQUIRED)
 find_package(moveit_servo REQUIRED)
 # Default to Fortress
 set(SDF_VER 12)
@ -79,6 +80,7 @@ set(deps
    moveit_ros_planning
    moveit_ros_planning_interface
    moveit_msgs
    moveit_servo
    geometry_msgs
    tf2_ros
    rclcpp_components
@ -133,6 +135,11 @@ add_executable(move_cartesian_path_action_server
               src/move_cartesian_path_action_server.cpp)
 ament_target_dependencies(move_cartesian_path_action_server ${deps})
 add_executable(servo_action_server
               src/moveit_servo_skill_server.cpp)
 ament_target_dependencies(servo_action_server ${deps})
 install(DIRECTORY include/ DESTINATION include)
 install(DIRECTORY launch config DESTINATION share/${PROJECT_NAME})
@ -143,6 +150,7 @@ install(
          pick_place_pose_loader
          move_to_joint_states_action_server
          move_cartesian_path_action_server
          servo_action_server
  ARCHIVE DESTINATION lib
  LIBRARY DESTINATION lib
  RUNTIME DESTINATION lib/${PROJECT_NAME})
--- a/rbs_skill_servers/launch/skills.launch.py
+++ b/rbs_skill_servers/launch/skills.launch.py
@ -26,6 +26,10 @@ def launch_setup(context, *args, **kwargs):
        "rbs_skill_servers", "config/gripperPositions.yaml"
    )
    kinematics_yaml = load_yaml("rbs_arm", "config/kinematics.yaml")
    robot_description_kinematics = {"robot_description_kinematics": kinematics_yaml}
    move_topose_action_server = Node(
        package="rbs_skill_servers",
        executable="move_topose_action_server",
@ -38,12 +42,6 @@ def launch_setup(context, *args, **kwargs):
        ]
    )
    move_to_pose = Node(
        package="rbs_skill_servers",
        executable="move_to_pose.py",
        namespace=namespace
    )
    gripper_control_node = Node(
        package="rbs_skill_servers",
        executable="gripper_control_action_server",
@ -69,8 +67,16 @@ def launch_setup(context, *args, **kwargs):
        ]
    )
-    # FIXME: The name of this node, "move_topose," 
+    cartesian_move_to_pose_action_server = Node(
-    # is intended to be different from the actual MoveToPose node.
+        package="rbs_skill_servers",
        executable="move_to_pose.py",
        namespace=namespace,
        parameters=[
            {"use_sim_time": use_sim_time},
            {"robot_name": namespace}
            ]
    )
    move_joint_state_action_server = Node(
        package="rbs_skill_servers",
        executable="move_to_joint_states_action_server",
@ -95,7 +101,7 @@ def launch_setup(context, *args, **kwargs):
        gripper_control_node,
        move_cartesian_path_action_server,
        move_joint_state_action_server,
-        move_to_pose,
+        cartesian_move_to_pose_action_server,
        # grasp_pose_loader
    ]
    return nodes_to_start
--- a/rbs_skill_servers/package.xml
+++ b/rbs_skill_servers/package.xml
@ -13,6 +13,7 @@
  <depend>moveit_core</depend>
  <depend>moveit_ros_planning</depend>
  <depend>moveit_ros_planning_interface</depend>
  <depend>moveit_servo</depend>
  <depend>moveit_msgs</depend>
  <depend>tf2_ros</depend>
  <depend>rclcpp_action</depend>
--- a/rbs_skill_servers/scripts/move_to_pose.py
+++ b/rbs_skill_servers/scripts/move_to_pose.py
@ -5,35 +5,40 @@ from rclpy.node import Node
 import numpy as np
 from rclpy.callback_groups import ReentrantCallbackGroup
 from rclpy.executors import MultiThreadedExecutor
-
+import math
 from geometry_msgs.msg import Pose, PoseStamped
 from rbs_skill_interfaces.action import MoveitSendPose
 from scipy.spatial.transform import Rotation as R
 from scipy.spatial.transform import Slerp
 class PoseSubscriber(Node):
    def __init__(self, parent=None):
        super().__init__('pose_subscriber')
        self.parent = parent
        self._sub = self.create_subscription(PoseStamped,
                                             "/cartesian_motion_controller/current_pose",
                                             self.parent.on_pose_callback, 1,
                                             callback_group=self.parent._callback_group)
        self.get_logger().info('PoseSubscriber node initialized')
 class CartesianMoveToPose(Node):
    def __init__(self):
-        super().__init__('cartesian_move_to_pose')
+        super().__init__('cartesian_move_to_pose') # pyright: ignore[]
        self.declare_parameter("base_link", "base_link")
        self.declare_parameter("robot_name", "")
        self._callback_group = ReentrantCallbackGroup()
        self._action_server = ActionServer(
            self,
            MoveitSendPose,
            'cartesian_move_to_pose',
            self.execute_callback, callback_group=self._callback_group)
        # for multirobot setup where each robot name is a namespace
        self.robot_name: str = self.get_parameter("robot_name").get_parameter_value().string_value
        self.robot_name = self.robot_name.lstrip('/').rstrip('/')
        self.robot_name = f"/{self.robot_name}" if self.robot_name else ""
        self._pub = self.create_publisher(PoseStamped,
-                                          "/cartesian_motion_controller/target_frame", 1,
+                                          f"{self.robot_name}/cartesian_motion_controller/target_frame", 1,
                                          callback_group=self._callback_group)
        self.current_pose = None
        self.goal_tolerance = 0.05
        self.max_speed = 0.1
        self.max_acceleration = 0.05
        self.base_link = self.get_parameter("base_link").get_parameter_value().string_value
    def on_pose_callback(self, msg: PoseStamped):
        if isinstance(msg, PoseStamped):
@ -41,13 +46,22 @@ class CartesianMoveToPose(Node):
    def execute_callback(self, goal_handle):
        self.get_logger().debug(f"Executing goal {goal_handle.request.target_pose}")
-        tp = PoseStamped()
+        target_pose = goal_handle.request.target_pose
-        tp.pose = goal_handle.request.target_pose
+        start_pose = self.current_pose.pose if self.current_pose else None
        tp.header.stamp = self.get_clock().now().to_msg()
        tp.header.frame_id = "base_link"
-        while self.get_distance_to_target(tp.pose) >= self.goal_tolerance:
+        if start_pose is None:
            self.get_logger().error("Current pose is not available")
            goal_handle.abort()
            return MoveitSendPose.Result()
        trajectory = self.generate_trajectory(start_pose, target_pose)
        for point in trajectory:
            tp = PoseStamped()
            tp.pose = point
            tp.header.stamp = self.get_clock().now().to_msg()
            tp.header.frame_id = self.base_link
            self._pub.publish(tp)
            rclpy.spin_once(self, timeout_sec=0.1)
        goal_handle.succeed()
@ -55,6 +69,77 @@ class CartesianMoveToPose(Node):
        result.success = True
        return result
    def generate_trajectory(self, start_pose, target_pose):
        start_position = np.array([
            start_pose.position.x,
            start_pose.position.y,
            start_pose.position.z
        ])
        target_position = np.array([
            target_pose.position.x,
            target_pose.position.y,
            target_pose.position.z
        ])
        start_orientation = R.from_quat([
            start_pose.orientation.x,
            start_pose.orientation.y,
            start_pose.orientation.z,
            start_pose.orientation.w
        ])
        target_orientation = R.from_quat([
            target_pose.orientation.x,
            target_pose.orientation.y,
            target_pose.orientation.z,
            target_pose.orientation.w
        ])
        distance = np.linalg.norm(target_position - start_position)
        max_speed = self.max_speed
        max_acceleration = self.max_acceleration
        t_acc = max_speed / max_acceleration
        d_acc = 0.5 * max_acceleration * t_acc**2
        if distance < 2 * d_acc:
            t_acc = math.sqrt(distance / max_acceleration)
            t_flat = 0
        else:
            t_flat = (distance - 2 * d_acc) / max_speed
        total_time = 2 * t_acc + t_flat
        num_points = int(total_time * 10)
        trajectory = []
        times = np.linspace(0, total_time, num_points + 1)
        key_rots = R.from_quat([start_orientation.as_quat(), target_orientation.as_quat()])
        slerp = Slerp([0, total_time], key_rots)
        for t in times:
            if t < t_acc:
                fraction = 0.5 * max_acceleration * t**2 / distance
            elif t < t_acc + t_flat:
                fraction = (d_acc + max_speed * (t - t_acc)) / distance
            else:
                t_decel = t - t_acc - t_flat
                fraction = (d_acc + max_speed * t_flat + 0.5 * max_acceleration * t_decel**2) / distance
            intermediate_position = start_position + fraction * (target_position - start_position)
            intermediate_orientation = slerp([t])[0]
            intermediate_pose = Pose()
            intermediate_pose.position.x = intermediate_position[0]
            intermediate_pose.position.y = intermediate_position[1]
            intermediate_pose.position.z = intermediate_position[2]
            intermediate_orientation_quat = intermediate_orientation.as_quat()
            intermediate_pose.orientation.x = intermediate_orientation_quat[0]
            intermediate_pose.orientation.y = intermediate_orientation_quat[1]
            intermediate_pose.orientation.z = intermediate_orientation_quat[2]
            intermediate_pose.orientation.w = intermediate_orientation_quat[3]
            trajectory.append(intermediate_pose)
        return trajectory
    def get_distance_to_target(self, target_pose: Pose):
        if self.current_pose is None or self.current_pose.pose is None:
            self.get_logger().warn("Current pose is not available")
@ -65,36 +150,40 @@ class CartesianMoveToPose(Node):
        current_position = np.array([
            current_pose.position.x,
            current_pose.position.y,
-            current_pose.position.z,
+            current_pose.position.z
            current_pose.orientation.x,
            current_pose.orientation.y,
            current_pose.orientation.z
        ])
        target_position = np.array([
            target_pose.position.x,
            target_pose.position.y,
-            target_pose.position.z,
+            target_pose.position.z
            target_pose.orientation.x,
            target_pose.orientation.y,
            target_pose.orientation.z
        ])
        # Проверка на наличие значений в массивах координат
        if np.any(np.isnan(current_position)) or np.any(np.isnan(target_position)):
            self.get_logger().error("Invalid coordinates")
            return None
        # Вычисляем расстояние между текущей и целевой позициями
        distance = np.linalg.norm(current_position - target_position)
        return distance
 class PoseSubscriber(Node):
    def __init__(self, parent: CartesianMoveToPose, robot_name: str):
        super().__init__('pose_subscriber') # pyright: ignore[]
        self.parent = parent
        self._sub = self.create_subscription(PoseStamped,
                                              f"{robot_name}/cartesian_motion_controller/current_pose",
                                             self.parent.on_pose_callback, 1,
                                             callback_group=self.parent._callback_group)
        self.get_logger().info('PoseSubscriber node initialized')
 def main(args=None):
    rclpy.init(args=args)
    cartesian_move_to_pose = CartesianMoveToPose()
-    pose_subscriber = PoseSubscriber(parent=cartesian_move_to_pose)
+    pose_subscriber = PoseSubscriber(parent=cartesian_move_to_pose, 
                                     robot_name=cartesian_move_to_pose.robot_name)
    executor = MultiThreadedExecutor()
    executor.add_node(cartesian_move_to_pose)
--- a/rbs_skill_servers/scripts/test_cartesian_controller.py
+++ b/rbs_skill_servers/scripts/test_cartesian_controller.py
@ -2,30 +2,36 @@
 import rclpy
 from rclpy.node import Node
 import argparse
 from geometry_msgs.msg import PoseStamped
 class CartesianControllerPublisher(Node):
-    def __init__(self, robot_name: str):
+    def __init__(self, robot_name: str, poses: dict):
        super().__init__("cartesian_controller_pose_publisher")
        self.publisher_ = self.create_publisher(
            PoseStamped, 
            "/" + robot_name + "/cartesian_motion_controller/target_frame", 10)
        timer_period = 0.5  # seconds
        self.timer = self.create_timer(timer_period, self.timer_callback)
        self.robot_name = robot_name
        self.poses = poses
    def timer_callback(self):
        pose = self.poses.get(self.robot_name, {
            'position': {'x': 0.0, 'y': 0.0, 'z': 0.0},
            'orientation': {'x': 0.0, 'y': 0.0, 'z': 0.0, 'w': 1.0}
        })
        msg = PoseStamped()
        msg.header.stamp = self.get_clock().now().to_msg()
        msg.header.frame_id = "base_link"
-        msg.pose.position.x = 0.7
+        msg.pose.position.x = pose['position']['x']
-        msg.pose.position.y = 0.0
+        msg.pose.position.y = pose['position']['y']
-        msg.pose.position.z = 0.45
+        msg.pose.position.z = pose['position']['z']
-        msg.pose.orientation.x = 0.0
+        msg.pose.orientation.x = pose['orientation']['x']
-        msg.pose.orientation.y = 0.707
+        msg.pose.orientation.y = pose['orientation']['y']
-        msg.pose.orientation.z = 0.0
+        msg.pose.orientation.z = pose['orientation']['z']
-        msg.pose.orientation.w = 0.707
+        msg.pose.orientation.w = pose['orientation']['w']
        self.publisher_.publish(msg)
@ -36,7 +42,21 @@ def main(args=None):
    parser = argparse.ArgumentParser(description='ROS2 Minimal Publisher')
    parser.add_argument('--robot-name', type=str, default='arm0', help='Specify the robot name')
    args = parser.parse_args()
-    minimal_publisher = CartesianControllerPublisher(args.robot_name)
+
    # Define poses for each robot
    poses = {
        'arm2': {
            'position': {'x': -0.3, 'y': 0.0, 'z': 0.45},
            'orientation': {'x': 0.0, 'y': -0.707, 'z': 0.0, 'w': 0.707}
        },
        'arm1': {
            'position': {'x': 0.3, 'y': 0.0, 'z': 0.45},
            'orientation': {'x': 0.0, 'y': 0.707, 'z': 0.0, 'w': 0.707}
        }
        # Add more robots and their poses as needed
    }
    minimal_publisher = CartesianControllerPublisher(args.robot_name, poses)
    rclpy.spin(minimal_publisher)
--- a/rbs_skill_servers/scripts/test_cartesian_controller_single.py
+++ b/rbs_skill_servers/scripts/test_cartesian_controller_single.py
@ -0,0 +1,47 @@
 import rclpy
 from rclpy.node import Node
 import argparse
 from geometry_msgs.msg import PoseStamped
 class CartesianControllerPublisher(Node):
    def __init__(self, robot_name: str):
        super().__init__("cartesian_controller_pose_publisher")
        self.publisher_ = self.create_publisher(
            PoseStamped, 
            "/cartesian_motion_controller/target_frame", 10)
        timer_period = 0.5  # seconds
        self.timer = self.create_timer(timer_period, self.timer_callback)
    def timer_callback(self):
        msg = PoseStamped()
        msg.header.stamp = self.get_clock().now().to_msg()
        msg.header.frame_id = "base_link"
        msg.pose.position.x = 0.2
        msg.pose.position.y = 0.2
        msg.pose.position.z = 0.2
        msg.pose.orientation.x = 0.0
        msg.pose.orientation.y = 1.0
        msg.pose.orientation.z = 0.0
        msg.pose.orientation.w = 0.0
        self.publisher_.publish(msg)
 def main(args=None):
    rclpy.init(args=args)
    parser = argparse.ArgumentParser(description='ROS2 Minimal Publisher')
    parser.add_argument('--robot-name', type=str, default='arm0', help='Specify the robot name')
    args = parser.parse_args()
    minimal_publisher = CartesianControllerPublisher(args.robot_name)
    rclpy.spin(minimal_publisher)
    minimal_publisher.destroy_node()
    rclpy.shutdown()
 if __name__ == '__main__':
    main()
--- a/rbs_skill_servers/src/move_cartesian_path_action_server.cpp
+++ b/rbs_skill_servers/src/move_cartesian_path_action_server.cpp
@ -1,5 +1,7 @@
 #include <functional>
 #include <memory>
 #include <moveit/robot_trajectory/robot_trajectory.h>
 #include <moveit/trajectory_processing/time_parameterization.h>
 #include <thread>
 #include "rclcpp/rclcpp.hpp"
@ -19,6 +21,7 @@
 #include "moveit/move_group_interface/move_group_interface.h"
 #include "moveit/planning_interface/planning_interface.h"
 #include "moveit/robot_model_loader/robot_model_loader.h"
 #include "moveit/trajectory_processing/time_optimal_trajectory_generation.h"
 /*
 #include <tf2/LinearMath/Quaternion.h>
@ -122,23 +125,59 @@ private:
    std::vector<geometry_msgs::msg::Pose> waypoints;
    auto current_pose = move_group_interface.getCurrentPose();
    // waypoints.push_back(current_pose.pose);
    // geometry_msgs::msg::Pose start_pose = current_pose.pose;
    geometry_msgs::msg::Pose target_pose = goal->target_pose;
    // target_pose.position = goal->target_pose.position;
    // int num_waypoints = 100;
    // for (int i = 1; i <= num_waypoints; ++i) {
    //   geometry_msgs::msg::Pose intermediate_pose;
    //   double fraction = static_cast<double>(i) / (num_waypoints + 1);
    //
    //   intermediate_pose.position.x =
    //       start_pose.position.x +
    //       fraction * (target_pose.position.x - start_pose.position.x);
    //   intermediate_pose.position.y =
    //       start_pose.position.y +
    //       fraction * (target_pose.position.y - start_pose.position.y);
    //   intermediate_pose.position.z =
    //       start_pose.position.z +
    //       fraction * (target_pose.position.z - start_pose.position.z);
    //
    //   intermediate_pose.orientation = start_pose.orientation;
    //
    //   waypoints.push_back(intermediate_pose);
    // }
    waypoints.push_back(target_pose);
    RCLCPP_INFO(this->get_logger(), "New cartesian target pose [%f, %f, %f]",
                target_pose.position.x, target_pose.position.y,
                target_pose.position.z);
-    // waypoints.push_back(start_pose.pose);
+
    moveit_msgs::msg::RobotTrajectory trajectory;
    const double jump_threshold = 0.0;
    const double eef_step = 0.001;
    double fraction = move_group_interface.computeCartesianPath(
        waypoints, eef_step, jump_threshold, trajectory);
    robot_trajectory::RobotTrajectory rt(
        move_group_interface.getCurrentState()->getRobotModel(),
        goal->robot_name);
    rt.setRobotTrajectoryMsg(*move_group_interface.getCurrentState(), trajectory);
    trajectory_processing::TimeOptimalTrajectoryGeneration tp;
    bool su = tp.computeTimeStamps(rt);
    rt.getRobotTrajectoryMsg(trajectory);
    moveit::planning_interface::MoveGroupInterface::Plan plan;
    plan.trajectory_ = trajectory;
    if (fraction > 0) {
      RCLCPP_INFO(this->get_logger(), "Planning success");
      moveit::core::MoveItErrorCode execute_err_code =
-          move_group_interface.execute(trajectory);
+          move_group_interface.execute(plan);
      if (execute_err_code == moveit::core::MoveItErrorCode::SUCCESS) {
        goal_handle->succeed(result);
        RCLCPP_INFO(this->get_logger(), "Successfully executed action goal");
--- a/rbs_skill_servers/src/moveit_servo_skill_server.cpp
+++ b/rbs_skill_servers/src/moveit_servo_skill_server.cpp
@ -0,0 +1,232 @@
 #include <functional>
 #include <geometry_msgs/msg/detail/pose_stamped__struct.hpp>
 #include <geometry_msgs/msg/detail/transform_stamped__struct.hpp>
 #include <memory>
 #include <rclcpp/publisher.hpp>
 #include <rclcpp/qos.hpp>
 #include <tf2/LinearMath/Transform.h>
 #include <tf2/convert.h>
 #include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
 #include <thread>
 #include "rclcpp/rclcpp.hpp"
 #include "rclcpp/timer.hpp"
 #include "rclcpp_components/register_node_macro.hpp"
 // action libs
 #include "rbs_skill_interfaces/action/moveit_send_pose.hpp"
 #include "rbs_skill_interfaces/msg/action_feedback_status_constants.hpp"
 #include "rclcpp_action/rclcpp_action.hpp"
 #include "geometry_msgs/msg/pose_stamped.hpp"
 #include "geometry_msgs/msg/quaternion.hpp"
 #include "geometry_msgs/msg/transform.hpp"
 // moveit libs
 #include <moveit_servo/make_shared_from_pool.h>
 #include <moveit_servo/pose_tracking.h>
 #include <moveit_servo/servo.h>
 #include <moveit_servo/servo_parameters.h>
 #include <moveit_servo/status_codes.h>
 namespace rbs_skill_actions {
 class StatusMonitor {
 public:
  StatusMonitor(const rclcpp::Node::SharedPtr &node, const std::string &topic)
      : m_node(node) {
    sub_ = node->create_subscription<std_msgs::msg::Int8>(
        topic, rclcpp::SystemDefaultsQoS(),
        [this](const std_msgs::msg::Int8::ConstSharedPtr &msg) {
          return statusCB(msg);
        });
  }
 private:
  rclcpp::Node::SharedPtr m_node;
  void statusCB(const std_msgs::msg::Int8::ConstSharedPtr &msg) {
    moveit_servo::StatusCode latest_status =
        static_cast<moveit_servo::StatusCode>(msg->data);
    if (latest_status != status_) {
      status_ = latest_status;
      const auto &status_str = moveit_servo::SERVO_STATUS_CODE_MAP.at(status_);
      RCLCPP_INFO_STREAM(m_node->get_logger(), "Servo status: " << status_str);
    }
  }
  moveit_servo::StatusCode status_ = moveit_servo::StatusCode::INVALID;
  rclcpp::Subscription<std_msgs::msg::Int8>::SharedPtr sub_;
 };
 class MoveServoActionServer : public rclcpp::Node {
 public:
  using MoveitSendPose = rbs_skill_interfaces::action::MoveitSendPose;
  // explicit MoveCartesianActionServer(const rclcpp::Node::SharedPtr& node)
  explicit MoveServoActionServer(const rclcpp::Node::SharedPtr &node)
      : Node("move_servo_action_server"), m_node(node) {
    auto servo_parameters =
        moveit_servo::ServoParameters::makeServoParameters(node);
    if (servo_parameters == nullptr) {
      RCLCPP_FATAL(node->get_logger(), "Could not get servo parameters!");
      // exit(EXIT_FAILURE);
    }
    // Load the planning scene monitor
    m_planning_scene_monitor =
        std::make_shared<planning_scene_monitor::PlanningSceneMonitor>(
            node, "robot_description");
    if (!m_planning_scene_monitor->getPlanningScene()) {
      RCLCPP_ERROR_STREAM(node->get_logger(),
                          "Error in setting up the PlanningSceneMonitor.");
    }
    m_planning_scene_monitor->providePlanningSceneService();
    m_planning_scene_monitor->startSceneMonitor();
    m_planning_scene_monitor->startWorldGeometryMonitor(
        planning_scene_monitor::PlanningSceneMonitor::
            DEFAULT_COLLISION_OBJECT_TOPIC,
        planning_scene_monitor::PlanningSceneMonitor::
            DEFAULT_PLANNING_SCENE_WORLD_TOPIC,
        false /* skip octomap monitor */);
    m_planning_scene_monitor->startStateMonitor("/joint_states");
    m_planning_scene_monitor->startPublishingPlanningScene(
        planning_scene_monitor::PlanningSceneMonitor::UPDATE_SCENE);
    // Wait for Planning Scene Monitor to setup
    if (!m_planning_scene_monitor->waitForCurrentRobotState(
            node->now(), 5.0 /* seconds */)) {
      RCLCPP_ERROR_STREAM(
          node->get_logger(),
          "Error waiting for current robot state in PlanningSceneMonitor.");
    }
    // Create the pose tracker
    m_tracker = std::make_shared<moveit_servo::PoseTracking>(
        node, servo_parameters, m_planning_scene_monitor);
    m_status_monitor = std::make_shared<StatusMonitor>(node, servo_parameters->status_topic);
  }
  void init() {
    m_action_server = rclcpp_action::create_server<MoveitSendPose>(
        m_node->get_node_base_interface(), m_node->get_node_clock_interface(),
        m_node->get_node_logging_interface(),
        m_node->get_node_waitables_interface(), "move_servo",
        std::bind(&MoveServoActionServer::goal_callback, this,
                  std::placeholders::_1, std::placeholders::_2),
        std::bind(&MoveServoActionServer::cancel_callback, this,
                  std::placeholders::_1),
        std::bind(&MoveServoActionServer::accepted_callback, this,
                  std::placeholders::_1));
    m_pose_pub = m_node->create_publisher<geometry_msgs::msg::PoseStamped>("target_pose", rclcpp::SystemDefaultsQoS());
  }
 private:
  rclcpp::Node::SharedPtr m_node;
  rclcpp_action::Server<MoveitSendPose>::SharedPtr m_action_server;
  rclcpp::Publisher<geometry_msgs::msg::PoseStamped>::SharedPtr m_pose_pub;
  planning_scene_monitor::PlanningSceneMonitorPtr m_planning_scene_monitor;
  moveit_servo::PoseTrackingPtr m_tracker;
  std::shared_ptr<StatusMonitor> m_status_monitor;
  using ServerGoalHandle = rclcpp_action::ServerGoalHandle<MoveitSendPose>;
  rclcpp_action::GoalResponse
  goal_callback(const rclcpp_action::GoalUUID &uuid,
                std::shared_ptr<const MoveitSendPose::Goal> goal) {
    RCLCPP_INFO(
        this->get_logger(),
        "Received goal request for robot [%s] with Pose [%f, %f, %f, %f, %f, "
        "%f, %f]",
        goal->robot_name.c_str(), goal->target_pose.position.x,
        goal->target_pose.position.y, goal->target_pose.position.z,
        goal->target_pose.orientation.x, goal->target_pose.orientation.y,
        goal->target_pose.orientation.z, goal->target_pose.orientation.w);
    (void)uuid;
    return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE;
  }
  rclcpp_action::CancelResponse
  cancel_callback(const std::shared_ptr<ServerGoalHandle> goal_handle) {
    RCLCPP_INFO(this->get_logger(), "Received cancel request");
    (void)goal_handle;
    return rclcpp_action::CancelResponse::ACCEPT;
  }
  void accepted_callback(const std::shared_ptr<ServerGoalHandle> goal_handle) {
    using namespace std::placeholders;
    std::thread(std::bind(&MoveServoActionServer::execute, this, _1),
                goal_handle)
        .detach();
    // std::thread(
    //     [this, goal_handle]() {
    //         execute(goal_handle);
    //     }).detach();
  }
  void execute(const std::shared_ptr<ServerGoalHandle> goal_handle) {
    RCLCPP_INFO(this->get_logger(), "Executing action goal");
    const auto goal = goal_handle->get_goal();
    auto result = std::make_shared<MoveitSendPose::Result>();
    geometry_msgs::msg::TransformStamped current_ee_pose;
    m_tracker->getCommandFrameTransform(current_ee_pose);
    // Convert it to a Pose
    geometry_msgs::msg::PoseStamped target_pose;
    target_pose.header.frame_id = current_ee_pose.header.frame_id;
    target_pose.pose = goal->target_pose;
    // target_pose.pose.position.y = current_ee_pose.transform.translation.y;
    // target_pose.pose.position.z = current_ee_pose.transform.translation.z;
    // target_pose.pose.orientation = current_ee_pose.transform.rotation;
    // target_pose.pose.position.x += 0.1;
    m_tracker->resetTargetPose();
    target_pose.header.stamp = m_node->now();
    m_pose_pub->publish(target_pose);
    Eigen::Vector3d lin_tol{ 0.001, 0.001, 0.001 };
    double rot_tol = 0.01;
    // Run the pose tracking
    moveit_servo::PoseTrackingStatusCode tracking_status =
        m_tracker->moveToPose(lin_tol, rot_tol, 0.1 /* target pose timeout */);
    if (tracking_status == moveit_servo::PoseTrackingStatusCode::SUCCESS) {
      result->success = true;
      goal_handle->succeed(result);
      RCLCPP_INFO(this->get_logger(), "Pose tracking succeeded.");
    } else {
      result->success = false;
      goal_handle->abort(result);
      RCLCPP_INFO(this->get_logger(), "Pose tracking failed with status: %d", static_cast<int>(tracking_status));
    }
    RCLCPP_INFO_STREAM(m_node->get_logger(), "Pose tracker exited with status: "
                             << moveit_servo::POSE_TRACKING_STATUS_CODE_MAP.at(tracking_status));
  }
 }; // class MoveCartesianActionServer
 } // namespace rbs_skill_actions
 int main(int argc, char **argv) {
  rclcpp::init(argc, argv);
  rclcpp::NodeOptions node_options;
  // node_options.automatically_declare_parameters_from_overrides(true);
  node_options.allow_undeclared_parameters();
  auto node = rclcpp::Node::make_shared("move_servo", "", node_options);
  rbs_skill_actions::MoveServoActionServer server(node);
  std::thread run_server([&server]() {
    rclcpp::sleep_for(std::chrono::seconds(3));
    server.init();
  });
  rclcpp::spin(node);
  run_server.join();
  return 0;
 }
--- a/Show more
+++ b/Show more
		`@ -0,0 +1,2 @@`
							`from .model_collection_randomizer import ModelCollectionRandomizer`
							`from .xacro2sdf import xacro2sdf`
		`@ -0,0 +1 @@`
							`from .manipulation import ManipulationGazeboEnvRandomizer`