Added rbs_gym package for RL & multi-robot launch setup

2024-07-04 11:38:08 +00:00 · 2024-07-04 11:38:08 +00:00 · b58307dea1
commit b58307dea1
parent f92670cd0d
103 changed files with 15170 additions and 653 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,6 @@
 ref
 *.blend1
 *.pyc
-*.vscode
+*.vscode
+**/tensorboard_logs/**
+**/logs/**
--- a/env_manager/rbs_gym/CMakeLists.txt
+++ b/env_manager/rbs_gym/CMakeLists.txt
@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 3.8)
+project(rbs_gym)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+# find dependencies
+find_package(ament_cmake REQUIRED)
+# uncomment the following section in order to fill in
+# further dependencies manually.
+# find_package(<dependency> REQUIRED)
+
+ament_python_install_package(${PROJECT_NAME})
+
+install(PROGRAMS
+  scripts/train.py
+  scripts/spawner.py
+  scripts/velocity.py
+  scripts/test_agent.py
+  scripts/evaluate.py
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  # the following line skips the linter which checks for copyrights
+  # comment the line when a copyright and license is added to all source files
+  set(ament_cmake_copyright_FOUND TRUE)
+  # the following line skips cpplint (only works in a git repo)
+  # comment the line when this package is in a git repo and when
+  # a copyright and license is added to all source files
+  set(ament_cmake_cpplint_FOUND TRUE)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+install(DIRECTORY rbs_gym/envs/worlds launch DESTINATION share/${PROJECT_NAME})
+
+ament_package()
--- a/env_manager/rbs_gym/LICENSE
+++ b/env_manager/rbs_gym/LICENSE
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/env_manager/rbs_gym/hyperparams/sac.yml
+++ b/env_manager/rbs_gym/hyperparams/sac.yml
@ -0,0 +1,42 @@
+# Reach
+Reach-Gazebo-v0:
+  policy: "MlpPolicy"
+  policy_kwargs:
+    n_critics: 2
+    net_arch: [128, 64]
+  n_timesteps: 200000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 512
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.001
+  ent_coef: "auto_0.1"
+  target_entropy: "auto"
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  noise_type: "normal"
+  noise_std: 0.025
+  use_sde: False
+  optimize_memory_usage: False
+
+Reach-ColorImage-Gazebo-v0:
+  policy: "CnnPolicy"
+  policy_kwargs:
+    n_critics: 2
+    net_arch: [128, 128]
+  n_timesteps: 50000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 32
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.0005
+  ent_coef: "auto_0.1"
+  target_entropy: "auto"
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  noise_type: "normal"
+  noise_std: 0.025
+  use_sde: False
+  optimize_memory_usage: False
--- a/env_manager/rbs_gym/hyperparams/td3.yml
+++ b/env_manager/rbs_gym/hyperparams/td3.yml
@ -0,0 +1,39 @@
+Reach-Gazebo-v0:
+  policy: "MlpPolicy"
+  policy_kwargs:
+    n_critics: 2
+    net_arch: [128, 64]
+  n_timesteps: 200000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 512
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.001
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  target_policy_noise: 0.1
+  target_noise_clip: 0.2
+  noise_type: "normal"
+  noise_std: 0.025
+  optimize_memory_usage: False
+
+Reach-ColorImage-Gazebo-v0:
+  policy: "CnnPolicy"
+  policy_kwargs:
+    n_critics: 2
+    net_arch: [128, 128]
+  n_timesteps: 50000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 32
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.0005
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  target_policy_noise: 0.1
+  target_noise_clip: 0.2
+  noise_type: "normal"
+  noise_std: 0.025
+  optimize_memory_usage: True
--- a/env_manager/rbs_gym/hyperparams/tqc.yml
+++ b/env_manager/rbs_gym/hyperparams/tqc.yml
@ -0,0 +1,46 @@
+# Reach
+Reach-Gazebo-v0:
+  policy: "MlpPolicy"
+  policy_kwargs:
+    n_quantiles: 25
+    n_critics: 2
+    net_arch: [128, 64]
+  n_timesteps: 200000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 512
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.001
+  ent_coef: "auto_0.1"
+  target_entropy: "auto"
+  top_quantiles_to_drop_per_net: 2
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  noise_type: "normal"
+  noise_std: 0.025
+  use_sde: False
+  optimize_memory_usage: False
+
+Reach-ColorImage-Gazebo-v0:
+  policy: "CnnPolicy"
+  policy_kwargs:
+    n_quantiles: 25
+    n_critics: 2
+    net_arch: [128, 128]
+  n_timesteps: 50000
+  buffer_size: 25000
+  learning_starts: 5000
+  batch_size: 32
+  learning_rate: lin_0.0002
+  gamma: 0.95
+  tau: 0.0005
+  ent_coef: "auto_0.1"
+  target_entropy: "auto"
+  top_quantiles_to_drop_per_net: 2
+  train_freq: [1, "episode"]
+  gradient_steps: 100
+  noise_type: "normal"
+  noise_std: 0.025
+  use_sde: False
+  optimize_memory_usage: True
--- a/env_manager/rbs_gym/launch/evaluate.launch.py
+++ b/env_manager/rbs_gym/launch/evaluate.launch.py
@ -0,0 +1,426 @@
+from launch import LaunchContext, LaunchDescription
+from launch.actions import (
+    DeclareLaunchArgument,
+    IncludeLaunchDescription,
+    OpaqueFunction,
+    SetEnvironmentVariable,
+    TimerAction
+)
+from launch.launch_description_sources import PythonLaunchDescriptionSource
+from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
+from launch_ros.substitutions import FindPackageShare
+from launch_ros.actions import Node
+import os
+from os import cpu_count
+from ament_index_python.packages import get_package_share_directory
+
+def launch_setup(context, *args, **kwargs):
+    # Initialize Arguments
+    robot_type = LaunchConfiguration("robot_type")
+    # General arguments
+    with_gripper_condition = LaunchConfiguration("with_gripper")
+    controllers_file = LaunchConfiguration("controllers_file")
+    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
+    description_package = LaunchConfiguration("description_package")
+    description_file = LaunchConfiguration("description_file")
+    robot_name = LaunchConfiguration("robot_name")
+    start_joint_controller = LaunchConfiguration("start_joint_controller")
+    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
+    launch_simulation = LaunchConfiguration("launch_sim")
+    launch_moveit = LaunchConfiguration("launch_moveit")
+    launch_task_planner = LaunchConfiguration("launch_task_planner")
+    launch_perception = LaunchConfiguration("launch_perception")
+    moveit_config_package = LaunchConfiguration("moveit_config_package")
+    moveit_config_file = LaunchConfiguration("moveit_config_file")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    hardware = LaunchConfiguration("hardware")
+    env_manager = LaunchConfiguration("env_manager")
+    launch_controllers = LaunchConfiguration("launch_controllers")
+    gazebo_gui = LaunchConfiguration("gazebo_gui")
+    gripper_name = LaunchConfiguration("gripper_name")
+
+    # training arguments
+    env = LaunchConfiguration("env")
+    algo = LaunchConfiguration("algo")
+    num_threads = LaunchConfiguration("num_threads")
+    seed = LaunchConfiguration("seed")
+    log_folder = LaunchConfiguration("log_folder")
+    verbose = LaunchConfiguration("verbose")
+    # use_sim_time = LaunchConfiguration("use_sim_time")
+    log_level = LaunchConfiguration("log_level")
+    env_kwargs = LaunchConfiguration("env_kwargs")
+
+    n_episodes = LaunchConfiguration("n_episodes")
+    exp_id = LaunchConfiguration("exp_id")
+    load_best = LaunchConfiguration("load_best")
+    load_checkpoint = LaunchConfiguration("load_checkpoint")
+    stochastic = LaunchConfiguration("stochastic")
+    reward_log = LaunchConfiguration("reward_log")
+    norm_reward = LaunchConfiguration("norm_reward")
+    no_render = LaunchConfiguration("no_render")
+
+
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    launch_simulation = LaunchConfiguration("launch_sim")
+
+    initial_joint_controllers_file_path = os.path.join(
+        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
+    )
+
+    single_robot_setup = IncludeLaunchDescription(
+        PythonLaunchDescriptionSource([
+            PathJoinSubstitution([
+                FindPackageShare('rbs_bringup'),
+                "launch",
+                "rbs_robot.launch.py"
+            ])
+        ]),
+        launch_arguments={
+            "env_manager": env_manager,
+            "with_gripper": with_gripper_condition,
+            "gripper_name": gripper_name,
+            # "controllers_file": controllers_file,
+            "robot_type": robot_type,
+            "controllers_file": initial_joint_controllers_file_path,
+            "cartesian_controllers": cartesian_controllers,
+            "description_package": description_package,
+            "description_file": description_file,
+            "robot_name": robot_name,
+            "start_joint_controller": start_joint_controller,
+            "initial_joint_controller": initial_joint_controller,
+            "launch_simulation": launch_simulation,
+            "launch_moveit": launch_moveit,
+            "launch_task_planner": launch_task_planner,
+            "launch_perception": launch_perception,
+            "moveit_config_package": moveit_config_package,
+            "moveit_config_file": moveit_config_file,
+            "use_sim_time": use_sim_time,
+            "sim_gazebo": sim_gazebo,
+            "hardware": hardware,
+            "launch_controllers": launch_controllers,
+            # "gazebo_gui": gazebo_gui
+        }.items()
+    )
+
+    args = [
+            "--env",
+            env,
+            "--env-kwargs",
+            env_kwargs,
+            "--algo",
+            algo,
+            "--seed",
+            seed,
+            "--num-threads",
+            num_threads,
+            "--n-episodes",
+            n_episodes,
+            "--log-folder",
+            log_folder,
+            "--exp-id",
+            exp_id,
+            "--load-best",
+            load_best,
+            "--load-checkpoint",
+            load_checkpoint,
+            "--stochastic",
+            stochastic,
+            "--reward-log",
+            reward_log,
+            "--norm-reward",
+            norm_reward,
+            "--no-render",
+            no_render,
+            "--verbose",
+            verbose,
+            "--ros-args",
+            "--log-level",
+            log_level,
+            ]
+
+    rl_task = Node(
+        package="rbs_gym",
+        executable="evaluate.py",
+        output="log",
+        arguments=args,
+        parameters=[{"use_sim_time": use_sim_time}],
+    )
+
+
+
+    delay_robot_control_stack = TimerAction(
+        period=10.0,
+        actions=[single_robot_setup]
+    )
+
+    nodes_to_start = [
+        # env,
+        rl_task,
+        delay_robot_control_stack
+    ]
+    return nodes_to_start
+
+
+def generate_launch_description():
+    declared_arguments = []
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_type",
+            description="Type of robot by name",
+            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
+            default_value="rbs_arm",
+        )
+    )
+    # General arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "controllers_file",
+            default_value="rbs_arm_controllers_gazebosim.yaml",
+            description="YAML file with the controllers configuration.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_package",
+            default_value="rbs_arm",
+            description="Description package with robot URDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom description.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_file",
+            default_value="rbs_arm_modular.xacro",
+            description="URDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_name",
+            default_value="arm0",
+            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "start_joint_controller",
+            default_value="false",
+            description="Enable headless mode for robot control",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "initial_joint_controller",
+            default_value="joint_trajectory_controller",
+            description="Robot controller to start.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_package",
+            default_value="rbs_arm",
+            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom moveit config.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_file",
+            default_value="rbs_arm.srdf.xacro",
+            description="MoveIt SRDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "use_sim_time",
+            default_value="true",
+            description="Make MoveIt to use simulation time.\
+            This is needed for the trajectory planing in simulation.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "gripper_name",
+            default_value="rbs_gripper",
+            choices=["rbs_gripper", ""],
+            description="choose gripper by name (leave empty if hasn't)",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("with_gripper", 
+                              default_value="true", 
+                              description="With gripper or not?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("sim_gazebo", 
+                              default_value="true", 
+                              description="Gazebo Simulation")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("env_manager", 
+                              default_value="false", 
+                              description="Launch env_manager?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_sim", 
+                              default_value="true", 
+                              description="Launch simulator (Gazebo)?\
+                              Most general arg")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_moveit", 
+                              default_value="false", 
+                              description="Launch moveit?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_perception", 
+                              default_value="false", 
+                              description="Launch perception?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_task_planner", 
+                              default_value="false", 
+                              description="Launch task_planner?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("cartesian_controllers", 
+                              default_value="true", 
+                              description="Load cartesian\
+                              controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("hardware", 
+                              choices=["gazebo", "mock"],
+                              default_value="gazebo", 
+                              description="Choose your harware_interface")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_controllers", 
+                              default_value="true", 
+                              description="Launch controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("gazebo_gui", 
+                              default_value="true", 
+                              description="Launch gazebo with gui?")
+    )
+    # training arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env",
+            default_value="Reach-Gazebo-v0",
+            description="Environment ID",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env_kwargs",
+            default_value="",
+            description="Optional keyword argument to pass to the env constructor.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "vec_env",
+            default_value="dummy",
+            description="Type of VecEnv to use (dummy or subproc).",
+        ))
+        # Algorithm and training
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "algo",
+            default_value="sac",
+            description="RL algorithm to use during the training.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "num_threads",
+            default_value="-1",
+            description="Number of threads for PyTorch (-1 to use default).",
+        ))
+        # Random seed
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "seed",
+            default_value="84",
+            description="Random generator seed.",
+        ))
+        # Logging
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_folder",
+            default_value="logs",
+            description="Path to the log directory.",
+        ))
+        # Verbosity
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "verbose",
+            default_value="1",
+            description="Verbose mode (0: no output, 1: INFO).",
+        ))
+        # HER specifics
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_level",
+            default_value="error",
+            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
+        ))
+
+
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_episodes",
+            default_value="200",
+            description="Number of evaluation episodes.",
+        ))
+    declared_arguments.append(
+    DeclareLaunchArgument(
+            "exp_id",
+            default_value="0",
+            description="Experiment ID (default: 0: latest, -1: no exp folder).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "load_best",
+            default_value="false",
+            description="Load best model instead of last model if available.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "load_checkpoint",
+            default_value="0",
+            description="Load checkpoint instead of last model if available, you must pass the number of timesteps corresponding to it.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "stochastic",
+            default_value="false",
+            description="Use stochastic actions instead of deterministic.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "reward_log",
+            default_value="reward_logs",
+            description="Where to log reward.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "norm_reward",
+            default_value="false",
+            description="Normalize reward if applicable (trained with VecNormalize)",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "no_render",
+            default_value="true",
+            description="Do not render the environment (useful for tests).",
+        ))
+
+
+
+    env_variables = [
+        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
+        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
+    ]
+
+    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/launch/optimize.launch.py
+++ b/env_manager/rbs_gym/launch/optimize.launch.py
@ -0,0 +1,519 @@
+from launch import LaunchDescription
+from launch.actions import (
+    DeclareLaunchArgument,
+    IncludeLaunchDescription,
+    OpaqueFunction,
+    TimerAction
+)
+from launch.launch_description_sources import PythonLaunchDescriptionSource
+from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
+from launch_ros.substitutions import FindPackageShare
+from launch_ros.actions import Node
+import os
+from os import cpu_count
+from ament_index_python.packages import get_package_share_directory
+
+def launch_setup(context, *args, **kwargs):
+    # Initialize Arguments
+    robot_type = LaunchConfiguration("robot_type")
+    # General arguments
+    with_gripper_condition = LaunchConfiguration("with_gripper")
+    controllers_file = LaunchConfiguration("controllers_file")
+    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
+    description_package = LaunchConfiguration("description_package")
+    description_file = LaunchConfiguration("description_file")
+    robot_name = LaunchConfiguration("robot_name")
+    start_joint_controller = LaunchConfiguration("start_joint_controller")
+    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
+    launch_simulation = LaunchConfiguration("launch_sim")
+    launch_moveit = LaunchConfiguration("launch_moveit")
+    launch_task_planner = LaunchConfiguration("launch_task_planner")
+    launch_perception = LaunchConfiguration("launch_perception")
+    moveit_config_package = LaunchConfiguration("moveit_config_package")
+    moveit_config_file = LaunchConfiguration("moveit_config_file")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    hardware = LaunchConfiguration("hardware")
+    env_manager = LaunchConfiguration("env_manager")
+    launch_controllers = LaunchConfiguration("launch_controllers")
+    gripper_name = LaunchConfiguration("gripper_name")
+
+    # training arguments
+    env = LaunchConfiguration("env")
+    env_kwargs = LaunchConfiguration("env_kwargs")
+    algo = LaunchConfiguration("algo")
+    hyperparams = LaunchConfiguration("hyperparams")
+    n_timesteps = LaunchConfiguration("n_timesteps")
+    num_threads = LaunchConfiguration("num_threads")
+    seed = LaunchConfiguration("seed")
+    preload_replay_buffer = LaunchConfiguration("preload_replay_buffer")
+    log_folder = LaunchConfiguration("log_folder")
+    tensorboard_log = LaunchConfiguration("tensorboard_log")
+    log_interval = LaunchConfiguration("log_interval")
+    uuid = LaunchConfiguration("uuid")
+    eval_episodes = LaunchConfiguration("eval_episodes")
+    verbose = LaunchConfiguration("verbose")
+    truncate_last_trajectory = LaunchConfiguration("truncate_last_trajectory")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    log_level = LaunchConfiguration("log_level")
+    sampler = LaunchConfiguration("sampler")
+    pruner = LaunchConfiguration("pruner")
+    n_trials = LaunchConfiguration("n_trials")
+    n_startup_trials = LaunchConfiguration("n_startup_trials")
+    n_evaluations = LaunchConfiguration("n_evaluations")
+    n_jobs = LaunchConfiguration("n_jobs")
+    storage = LaunchConfiguration("storage")
+    study_name = LaunchConfiguration("study_name")
+
+
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    launch_simulation = LaunchConfiguration("launch_sim")
+
+    initial_joint_controllers_file_path = os.path.join(
+        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
+    )
+
+    single_robot_setup = IncludeLaunchDescription(
+        PythonLaunchDescriptionSource([
+            PathJoinSubstitution([
+                FindPackageShare('rbs_bringup'),
+                "launch",
+                "rbs_robot.launch.py"
+            ])
+        ]),
+        launch_arguments={
+            "env_manager": env_manager,
+            "with_gripper": with_gripper_condition,
+            "gripper_name": gripper_name,
+            "controllers_file": controllers_file,
+            "robot_type": robot_type,
+            "controllers_file": initial_joint_controllers_file_path,
+            "cartesian_controllers": cartesian_controllers,
+            "description_package": description_package,
+            "description_file": description_file,
+            "robot_name": robot_name,
+            "start_joint_controller": start_joint_controller,
+            "initial_joint_controller": initial_joint_controller,
+            "launch_simulation": launch_simulation,
+            "launch_moveit": launch_moveit,
+            "launch_task_planner": launch_task_planner,
+            "launch_perception": launch_perception,
+            "moveit_config_package": moveit_config_package,
+            "moveit_config_file": moveit_config_file,
+            "use_sim_time": use_sim_time,
+            "sim_gazebo": sim_gazebo,
+            "hardware": hardware,
+            "launch_controllers": launch_controllers,
+            # "gazebo_gui": gazebo_gui
+        }.items()
+    )
+
+    args = [
+        "--env",
+        env,
+        "--env-kwargs",
+        env_kwargs,
+        "--algo",
+        algo,
+        "--seed",
+        seed,
+        "--num-threads",
+        num_threads,
+        "--n-timesteps",
+        n_timesteps,
+        "--preload-replay-buffer",
+        preload_replay_buffer,
+        "--log-folder",
+        log_folder,
+        "--tensorboard-log",
+        tensorboard_log,
+        "--log-interval",
+        log_interval,
+        "--uuid",
+        uuid,
+        "--optimize-hyperparameters",
+        "True",
+        "--sampler",
+        sampler,
+        "--pruner",
+        pruner,
+        "--n-trials",
+        n_trials,
+        "--n-startup-trials",
+        n_startup_trials,
+        "--n-evaluations",
+        n_evaluations,
+        "--n-jobs",
+        n_jobs,
+        "--storage",
+        storage,
+        "--study-name",
+        study_name,
+        "--eval-episodes",
+        eval_episodes,
+        "--verbose",
+        verbose,
+        "--truncate-last-trajectory",
+        truncate_last_trajectory,
+       "--ros-args",
+       "--log-level",
+       log_level,
+    ]
+
+    rl_task = Node(
+        package="rbs_gym",
+        executable="train.py",
+        output="log",
+        arguments = args,
+        parameters=[{"use_sim_time": True}]
+    )
+
+
+    delay_robot_control_stack = TimerAction(
+        period=10.0,
+        actions=[single_robot_setup]
+    )
+    
+    nodes_to_start = [
+        rl_task,
+        delay_robot_control_stack
+    ]
+    return nodes_to_start
+
+
+def generate_launch_description():
+    declared_arguments = []
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_type",
+            description="Type of robot by name",
+            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
+            default_value="rbs_arm",
+        )
+    )
+    # General arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "controllers_file",
+            default_value="rbs_arm_controllers_gazebosim.yaml",
+            description="YAML file with the controllers configuration.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_package",
+            default_value="rbs_arm",
+            description="Description package with robot URDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom description.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_file",
+            default_value="rbs_arm_modular.xacro",
+            description="URDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_name",
+            default_value="arm0",
+            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "start_joint_controller",
+            default_value="false",
+            description="Enable headless mode for robot control",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "initial_joint_controller",
+            default_value="joint_trajectory_controller",
+            description="Robot controller to start.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_package",
+            default_value="rbs_arm",
+            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom moveit config.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_file",
+            default_value="rbs_arm.srdf.xacro",
+            description="MoveIt SRDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "use_sim_time",
+            default_value="true",
+            description="Make MoveIt to use simulation time.\
+            This is needed for the trajectory planing in simulation.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "gripper_name",
+            default_value="rbs_gripper",
+            choices=["rbs_gripper", ""],
+            description="choose gripper by name (leave empty if hasn't)",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("with_gripper", 
+                              default_value="true", 
+                              description="With gripper or not?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("sim_gazebo", 
+                              default_value="true", 
+                              description="Gazebo Simulation")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("env_manager", 
+                              default_value="false", 
+                              description="Launch env_manager?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_sim", 
+                              default_value="true", 
+                              description="Launch simulator (Gazebo)?\
+                              Most general arg")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_moveit", 
+                              default_value="false", 
+                              description="Launch moveit?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_perception", 
+                              default_value="false", 
+                              description="Launch perception?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_task_planner", 
+                              default_value="false", 
+                              description="Launch task_planner?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("cartesian_controllers", 
+                              default_value="true", 
+                              description="Load cartesian\
+                              controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("hardware", 
+                              choices=["gazebo", "mock"],
+                              default_value="gazebo", 
+                              description="Choose your harware_interface")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_controllers", 
+                              default_value="true", 
+                              description="Launch controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("gazebo_gui", 
+                              default_value="true", 
+                              description="Launch gazebo with gui?")
+    )
+    # training arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env",
+            default_value="Reach-Gazebo-v0",
+            description="Environment ID",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env_kwargs",
+            default_value="",
+            description="Optional keyword argument to pass to the env constructor.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "vec_env",
+            default_value="dummy",
+            description="Type of VecEnv to use (dummy or subproc).",
+        ))
+        # Algorithm and training
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "algo",
+            default_value="sac",
+            description="RL algorithm to use during the training.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_timesteps",
+            default_value="-1",
+            description="Overwrite the number of timesteps.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "hyperparams",
+            default_value="",
+            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "num_threads",
+            default_value="-1",
+            description="Number of threads for PyTorch (-1 to use default).",
+        ))
+        # Continue training an already trained agent
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "trained_agent",
+            default_value="",
+            description="Path to a pretrained agent to continue training.",
+        ))
+        # Random seed
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "seed",
+            default_value="84",
+            description="Random generator seed.",
+        ))
+        # Saving of model
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_freq",
+            default_value="10000",
+            description="Save the model every n steps (if negative, no checkpoint).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_replay_buffer",
+            default_value="False",
+            description="Save the replay buffer too (when applicable).",
+        ))
+        # Pre-load a replay buffer and start training on it
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "preload_replay_buffer",
+            default_value="",
+            description="Path to a replay buffer that should be preloaded before starting the training process.",
+        ))
+        # Logging
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_folder",
+            default_value="logs",
+            description="Path to the log directory.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "tensorboard_log",
+            default_value="tensorboard_logs",
+            description="Tensorboard log dir.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_interval",
+            default_value="-1",
+            description="Override log interval (default: -1, no change).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "uuid",
+            default_value="False",
+            description="Ensure that the run has a unique ID.",
+        ))
+
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "sampler",
+            default_value="tpe",
+            description="Sampler to use when optimizing hyperparameters (random, tpe or skopt).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "pruner",
+            default_value="median",
+            description="Pruner to use when optimizing hyperparameters (halving, median or none).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_trials",
+            default_value="10",
+            description="Number of trials for optimizing hyperparameters.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_startup_trials",
+            default_value="5",
+            description="Number of trials before using optuna sampler.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_evaluations",
+            default_value="2",
+            description="Number of evaluations for hyperparameter optimization.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_jobs",
+            default_value="1",
+            description="Number of parallel jobs when optimizing hyperparameters.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "storage",
+            default_value="",
+            description="Database storage path if distributed optimization should be used.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "study_name",
+            default_value="",
+            description="Study name for distributed optimization.",
+        ))
+
+        # Evaluation
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_freq",
+            default_value="-1",
+            description="Evaluate the agent every n steps (if negative, no evaluation).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_episodes",
+            default_value="5",
+            description="Number of episodes to use for evaluation.",
+        ))
+        # Verbosity
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "verbose",
+            default_value="1",
+            description="Verbose mode (0: no output, 1: INFO).",
+        ))
+        # HER specifics
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "truncate_last_trajectory",
+            default_value="True",
+            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
+        )),
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_level",
+            default_value="error",
+            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
+        ))
+
+#   env_variables = [
+#       SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
+#       SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
+#   ]
+
+    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)])
--- a/env_manager/rbs_gym/launch/test_env.launch.py
+++ b/env_manager/rbs_gym/launch/test_env.launch.py
@ -0,0 +1,415 @@
+from launch import LaunchDescription
+from launch.actions import (
+    DeclareLaunchArgument,
+    IncludeLaunchDescription,
+    OpaqueFunction,
+    SetEnvironmentVariable,
+    TimerAction
+)
+from launch.launch_description_sources import PythonLaunchDescriptionSource
+from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
+from launch_ros.substitutions import FindPackageShare
+from launch_ros.actions import Node
+import os
+from os import cpu_count
+from ament_index_python.packages import get_package_share_directory
+
+def launch_setup(context, *args, **kwargs):
+    # Initialize Arguments
+    robot_type = LaunchConfiguration("robot_type")
+    # General arguments
+    with_gripper_condition = LaunchConfiguration("with_gripper")
+    controllers_file = LaunchConfiguration("controllers_file")
+    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
+    description_package = LaunchConfiguration("description_package")
+    description_file = LaunchConfiguration("description_file")
+    robot_name = LaunchConfiguration("robot_name")
+    start_joint_controller = LaunchConfiguration("start_joint_controller")
+    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
+    launch_simulation = LaunchConfiguration("launch_sim")
+    launch_moveit = LaunchConfiguration("launch_moveit")
+    launch_task_planner = LaunchConfiguration("launch_task_planner")
+    launch_perception = LaunchConfiguration("launch_perception")
+    moveit_config_package = LaunchConfiguration("moveit_config_package")
+    moveit_config_file = LaunchConfiguration("moveit_config_file")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    hardware = LaunchConfiguration("hardware")
+    env_manager = LaunchConfiguration("env_manager")
+    launch_controllers = LaunchConfiguration("launch_controllers")
+    gripper_name = LaunchConfiguration("gripper_name")
+
+    # training arguments
+    env = LaunchConfiguration("env")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    log_level = LaunchConfiguration("log_level")
+    env_kwargs = LaunchConfiguration("env_kwargs")
+
+
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    launch_simulation = LaunchConfiguration("launch_sim")
+
+    initial_joint_controllers_file_path = os.path.join(
+        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
+    )
+
+    single_robot_setup = IncludeLaunchDescription(
+        PythonLaunchDescriptionSource([
+            PathJoinSubstitution([
+                FindPackageShare('rbs_bringup'),
+                "launch",
+                "rbs_robot.launch.py"
+            ])
+        ]),
+        launch_arguments={
+            "env_manager": env_manager,
+            "with_gripper": with_gripper_condition,
+            "gripper_name": gripper_name,
+            "controllers_file": controllers_file,
+            "robot_type": robot_type,
+            "controllers_file": initial_joint_controllers_file_path,
+            "cartesian_controllers": cartesian_controllers,
+            "description_package": description_package,
+            "description_file": description_file,
+            "robot_name": robot_name,
+            "start_joint_controller": start_joint_controller,
+            "initial_joint_controller": initial_joint_controller,
+            "launch_simulation": launch_simulation,
+            "launch_moveit": launch_moveit,
+            "launch_task_planner": launch_task_planner,
+            "launch_perception": launch_perception,
+            "moveit_config_package": moveit_config_package,
+            "moveit_config_file": moveit_config_file,
+            "use_sim_time": use_sim_time,
+            "sim_gazebo": sim_gazebo,
+            "hardware": hardware,
+            "launch_controllers": launch_controllers,
+            # "gazebo_gui": gazebo_gui
+        }.items()
+    )
+
+    args = [
+        "--env",
+        env,
+        "--env-kwargs",
+        env_kwargs,
+        "--ros-args",
+        "--log-level",
+        log_level,
+    ]
+
+    rl_task = Node(
+        package="rbs_gym",
+        executable="test_agent.py",
+        output="log",
+        arguments=args,
+        parameters=[{"use_sim_time": True}]
+    )
+
+    clock_bridge = Node(
+        package='ros_gz_bridge',
+        executable='parameter_bridge',
+        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
+        output='screen')
+
+
+    delay_robot_control_stack = TimerAction(
+        period=10.0,
+        actions=[single_robot_setup]
+    )
+
+    nodes_to_start = [
+        # env,
+        rl_task,
+        clock_bridge,
+        delay_robot_control_stack
+    ]
+    return nodes_to_start
+
+
+def generate_launch_description():
+    declared_arguments = []
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_type",
+            description="Type of robot by name",
+            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
+            default_value="rbs_arm",
+        )
+    )
+    # General arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "controllers_file",
+            default_value="rbs_arm_controllers_gazebosim.yaml",
+            description="YAML file with the controllers configuration.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_package",
+            default_value="rbs_arm",
+            description="Description package with robot URDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom description.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_file",
+            default_value="rbs_arm_modular.xacro",
+            description="URDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_name",
+            default_value="arm0",
+            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "start_joint_controller",
+            default_value="false",
+            description="Enable headless mode for robot control",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "initial_joint_controller",
+            default_value="joint_trajectory_controller",
+            description="Robot controller to start.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_package",
+            default_value="rbs_arm",
+            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom moveit config.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_file",
+            default_value="rbs_arm.srdf.xacro",
+            description="MoveIt SRDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "use_sim_time",
+            default_value="true",
+            description="Make MoveIt to use simulation time.\
+            This is needed for the trajectory planing in simulation.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "gripper_name",
+            default_value="rbs_gripper",
+            choices=["rbs_gripper", ""],
+            description="choose gripper by name (leave empty if hasn't)",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("with_gripper", 
+                              default_value="true", 
+                              description="With gripper or not?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("sim_gazebo", 
+                              default_value="true", 
+                              description="Gazebo Simulation")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("env_manager", 
+                              default_value="false", 
+                              description="Launch env_manager?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_sim", 
+                              default_value="true", 
+                              description="Launch simulator (Gazebo)?\
+                              Most general arg")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_moveit", 
+                              default_value="false", 
+                              description="Launch moveit?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_perception", 
+                              default_value="false", 
+                              description="Launch perception?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_task_planner", 
+                              default_value="false", 
+                              description="Launch task_planner?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("cartesian_controllers", 
+                              default_value="true", 
+                              description="Load cartesian\
+                              controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("hardware", 
+                              choices=["gazebo", "mock"],
+                              default_value="gazebo", 
+                              description="Choose your harware_interface")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_controllers", 
+                              default_value="true", 
+                              description="Launch controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("gazebo_gui", 
+                              default_value="true", 
+                              description="Launch gazebo with gui?")
+    )
+    # training arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env",
+            default_value="Reach-Gazebo-v0",
+            description="Environment ID",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env_kwargs",
+            default_value="",
+            description="Optional keyword argument to pass to the env constructor.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "vec_env",
+            default_value="dummy",
+            description="Type of VecEnv to use (dummy or subproc).",
+        ))
+        # Algorithm and training
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "algo",
+            default_value="sac",
+            description="RL algorithm to use during the training.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_timesteps",
+            default_value="-1",
+            description="Overwrite the number of timesteps.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "hyperparams",
+            default_value="",
+            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "num_threads",
+            default_value="-1",
+            description="Number of threads for PyTorch (-1 to use default).",
+        ))
+        # Continue training an already trained agent
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "trained_agent",
+            default_value="",
+            description="Path to a pretrained agent to continue training.",
+        ))
+        # Random seed
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "seed",
+            default_value="-1",
+            description="Random generator seed.",
+        ))
+        # Saving of model
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_freq",
+            default_value="10000",
+            description="Save the model every n steps (if negative, no checkpoint).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_replay_buffer",
+            default_value="False",
+            description="Save the replay buffer too (when applicable).",
+        ))
+        # Pre-load a replay buffer and start training on it
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "preload_replay_buffer",
+            default_value="",
+            description="Path to a replay buffer that should be preloaded before starting the training process.",
+        ))
+        # Logging
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_folder",
+            default_value="logs",
+            description="Path to the log directory.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "tensorboard_log",
+            default_value="tensorboard_logs",
+            description="Tensorboard log dir.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_interval",
+            default_value="-1",
+            description="Override log interval (default: -1, no change).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "uuid",
+            default_value="False",
+            description="Ensure that the run has a unique ID.",
+        ))
+        # Evaluation
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_freq",
+            default_value="-1",
+            description="Evaluate the agent every n steps (if negative, no evaluation).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_episodes",
+            default_value="5",
+            description="Number of episodes to use for evaluation.",
+        ))
+        # Verbosity
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "verbose",
+            default_value="1",
+            description="Verbose mode (0: no output, 1: INFO).",
+        ))
+        # HER specifics
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "truncate_last_trajectory",
+            default_value="True",
+            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
+        )),
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_level",
+            default_value="error",
+            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
+        ))
+
+    env_variables = [
+        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
+        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
+    ]
+
+    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/launch/train.launch.py
+++ b/env_manager/rbs_gym/launch/train.launch.py
@ -0,0 +1,476 @@
+from launch import LaunchDescription
+from launch.actions import (
+    DeclareLaunchArgument,
+    IncludeLaunchDescription,
+    OpaqueFunction,
+    SetEnvironmentVariable,
+    TimerAction
+)
+from launch.launch_description_sources import PythonLaunchDescriptionSource
+from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
+from launch_ros.substitutions import FindPackageShare
+from launch_ros.actions import Node
+import os
+from os import cpu_count
+from ament_index_python.packages import get_package_share_directory
+
+def launch_setup(context, *args, **kwargs):
+    # Initialize Arguments
+    robot_type = LaunchConfiguration("robot_type")
+    # General arguments
+    with_gripper_condition = LaunchConfiguration("with_gripper")
+    controllers_file = LaunchConfiguration("controllers_file")
+    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
+    description_package = LaunchConfiguration("description_package")
+    description_file = LaunchConfiguration("description_file")
+    robot_name = LaunchConfiguration("robot_name")
+    start_joint_controller = LaunchConfiguration("start_joint_controller")
+    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
+    launch_simulation = LaunchConfiguration("launch_sim")
+    launch_moveit = LaunchConfiguration("launch_moveit")
+    launch_task_planner = LaunchConfiguration("launch_task_planner")
+    launch_perception = LaunchConfiguration("launch_perception")
+    moveit_config_package = LaunchConfiguration("moveit_config_package")
+    moveit_config_file = LaunchConfiguration("moveit_config_file")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    hardware = LaunchConfiguration("hardware")
+    env_manager = LaunchConfiguration("env_manager")
+    launch_controllers = LaunchConfiguration("launch_controllers")
+    gripper_name = LaunchConfiguration("gripper_name")
+
+    # training arguments
+    env = LaunchConfiguration("env")
+    algo = LaunchConfiguration("algo")
+    hyperparams = LaunchConfiguration("hyperparams")
+    n_timesteps = LaunchConfiguration("n_timesteps")
+    num_threads = LaunchConfiguration("num_threads")
+    seed = LaunchConfiguration("seed")
+    trained_agent = LaunchConfiguration("trained_agent")
+    save_freq = LaunchConfiguration("save_freq")
+    save_replay_buffer = LaunchConfiguration("save_replay_buffer")
+    preload_replay_buffer = LaunchConfiguration("preload_replay_buffer")
+    log_folder = LaunchConfiguration("log_folder")
+    tensorboard_log = LaunchConfiguration("tensorboard_log")
+    log_interval = LaunchConfiguration("log_interval")
+    uuid = LaunchConfiguration("uuid")
+    eval_freq = LaunchConfiguration("eval_freq")
+    eval_episodes = LaunchConfiguration("eval_episodes")
+    verbose = LaunchConfiguration("verbose")
+    truncate_last_trajectory = LaunchConfiguration("truncate_last_trajectory")
+    use_sim_time = LaunchConfiguration("use_sim_time")
+    log_level = LaunchConfiguration("log_level")
+    env_kwargs = LaunchConfiguration("env_kwargs")
+
+    track = LaunchConfiguration("track")
+
+
+    sim_gazebo = LaunchConfiguration("sim_gazebo")
+    launch_simulation = LaunchConfiguration("launch_sim")
+
+    initial_joint_controllers_file_path = os.path.join(
+        get_package_share_directory('rbs_arm'), 'config', 'rbs_arm0_controllers.yaml'
+    )
+
+    single_robot_setup = IncludeLaunchDescription(
+        PythonLaunchDescriptionSource([
+            PathJoinSubstitution([
+                FindPackageShare('rbs_bringup'),
+                "launch",
+                "rbs_robot.launch.py"
+            ])
+        ]),
+        launch_arguments={
+            "env_manager": env_manager,
+            "with_gripper": with_gripper_condition,
+            "gripper_name": gripper_name,
+            "controllers_file": controllers_file,
+            "robot_type": robot_type,
+            "controllers_file": initial_joint_controllers_file_path,
+            "cartesian_controllers": cartesian_controllers,
+            "description_package": description_package,
+            "description_file": description_file,
+            "robot_name": robot_name,
+            "start_joint_controller": start_joint_controller,
+            "initial_joint_controller": initial_joint_controller,
+            "launch_simulation": launch_simulation,
+            "launch_moveit": launch_moveit,
+            "launch_task_planner": launch_task_planner,
+            "launch_perception": launch_perception,
+            "moveit_config_package": moveit_config_package,
+            "moveit_config_file": moveit_config_file,
+            "use_sim_time": use_sim_time,
+            "sim_gazebo": sim_gazebo,
+            "hardware": hardware,
+            "launch_controllers": launch_controllers,
+            # "gazebo_gui": gazebo_gui
+        }.items()
+    )
+
+    args = [
+        "--env",
+        env,
+        "--env-kwargs",
+        env_kwargs,
+        "--algo",
+        algo,
+        "--hyperparams",
+        hyperparams,
+        "--n-timesteps",
+        n_timesteps,
+        "--num-threads",
+        num_threads,
+        "--seed",
+        seed,
+        "--trained-agent",
+        trained_agent,
+        "--save-freq",
+        save_freq,
+        "--save-replay-buffer",
+        save_replay_buffer,
+        "--preload-replay-buffer",
+        preload_replay_buffer,
+        "--log-folder",
+        log_folder,
+        "--tensorboard-log",
+        tensorboard_log,
+        "--log-interval",
+        log_interval,
+        "--uuid",
+        uuid,
+        "--eval-freq",
+        eval_freq,
+        "--eval-episodes",
+        eval_episodes,
+        "--verbose",
+        verbose,
+        "--track",
+        track,
+        "--truncate-last-trajectory",
+        truncate_last_trajectory,
+        "--ros-args",
+        "--log-level",
+        log_level,
+    ]
+
+    clock_bridge = Node(
+        package='ros_gz_bridge',
+        executable='parameter_bridge',
+        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
+        output='screen')
+
+    rl_task = Node(
+        package="rbs_gym",
+        executable="train.py",
+        output="log",
+        arguments=args,
+        parameters=[{"use_sim_time": True}]
+    )
+
+
+    delay_robot_control_stack = TimerAction(
+        period=20.0,
+        actions=[single_robot_setup]
+    )
+    
+    nodes_to_start = [
+        # env,
+        rl_task,
+        clock_bridge,
+        delay_robot_control_stack
+    ]
+    return nodes_to_start
+
+
+def generate_launch_description():
+    declared_arguments = []
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_type",
+            description="Type of robot by name",
+            choices=["rbs_arm","ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
+            default_value="rbs_arm",
+        )
+    )
+    # General arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "controllers_file",
+            default_value="rbs_arm_controllers_gazebosim.yaml",
+            description="YAML file with the controllers configuration.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_package",
+            default_value="rbs_arm",
+            description="Description package with robot URDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom description.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "description_file",
+            default_value="rbs_arm_modular.xacro",
+            description="URDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "robot_name",
+            default_value="arm0",
+            description="Name for robot, used to apply namespace for specific robot in multirobot setup",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "start_joint_controller",
+            default_value="false",
+            description="Enable headless mode for robot control",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "initial_joint_controller",
+            default_value="joint_trajectory_controller",
+            description="Robot controller to start.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_package",
+            default_value="rbs_arm",
+            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
+        is not set, it enables use of a custom moveit config.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "moveit_config_file",
+            default_value="rbs_arm.srdf.xacro",
+            description="MoveIt SRDF/XACRO description file with the robot.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "use_sim_time",
+            default_value="true",
+            description="Make MoveIt to use simulation time.\
+            This is needed for the trajectory planing in simulation.",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "gripper_name",
+            default_value="rbs_gripper",
+            choices=["rbs_gripper", ""],
+            description="choose gripper by name (leave empty if hasn't)",
+        )
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("with_gripper", 
+                              default_value="true", 
+                              description="With gripper or not?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("sim_gazebo", 
+                              default_value="true", 
+                              description="Gazebo Simulation")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("env_manager", 
+                              default_value="false", 
+                              description="Launch env_manager?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_sim", 
+                              default_value="true", 
+                              description="Launch simulator (Gazebo)?\
+                              Most general arg")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_moveit", 
+                              default_value="false", 
+                              description="Launch moveit?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_perception", 
+                              default_value="false", 
+                              description="Launch perception?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_task_planner", 
+                              default_value="false", 
+                              description="Launch task_planner?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("cartesian_controllers", 
+                              default_value="true", 
+                              description="Load cartesian\
+                              controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("hardware", 
+                              choices=["gazebo", "mock"],
+                              default_value="gazebo", 
+                              description="Choose your harware_interface")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("launch_controllers", 
+                              default_value="true", 
+                              description="Launch controllers?")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("gazebo_gui", 
+                              default_value="true", 
+                              description="Launch gazebo with gui?")
+    )
+    # training arguments
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env",
+            default_value="Reach-Gazebo-v0",
+            description="Environment ID",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "env_kwargs",
+            default_value="",
+            description="Optional keyword argument to pass to the env constructor.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "vec_env",
+            default_value="dummy",
+            description="Type of VecEnv to use (dummy or subproc).",
+        ))
+        # Algorithm and training
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "algo",
+            default_value="sac",
+            description="RL algorithm to use during the training.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "n_timesteps",
+            default_value="-1",
+            description="Overwrite the number of timesteps.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "hyperparams",
+            default_value="",
+            description="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "num_threads",
+            default_value="-1",
+            description="Number of threads for PyTorch (-1 to use default).",
+        ))
+        # Continue training an already trained agent
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "trained_agent",
+            default_value="",
+            description="Path to a pretrained agent to continue training.",
+        ))
+        # Random seed
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "seed",
+            default_value="-1",
+            description="Random generator seed.",
+        ))
+        # Saving of model
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_freq",
+            default_value="10000",
+            description="Save the model every n steps (if negative, no checkpoint).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "save_replay_buffer",
+            default_value="False",
+            description="Save the replay buffer too (when applicable).",
+        ))
+        # Pre-load a replay buffer and start training on it
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "preload_replay_buffer",
+            default_value="",
+            description="Path to a replay buffer that should be preloaded before starting the training process.",
+        ))
+        # Logging
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_folder",
+            default_value="logs",
+            description="Path to the log directory.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "tensorboard_log",
+            default_value="tensorboard_logs",
+            description="Tensorboard log dir.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_interval",
+            default_value="-1",
+            description="Override log interval (default: -1, no change).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "uuid",
+            default_value="False",
+            description="Ensure that the run has a unique ID.",
+        ))
+        # Evaluation
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_freq",
+            default_value="-1",
+            description="Evaluate the agent every n steps (if negative, no evaluation).",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "eval_episodes",
+            default_value="5",
+            description="Number of episodes to use for evaluation.",
+        ))
+        # Verbosity
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "verbose",
+            default_value="1",
+            description="Verbose mode (0: no output, 1: INFO).",
+        ))
+        # HER specifics
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "truncate_last_trajectory",
+            default_value="True",
+            description="When using HER with online sampling the last trajectory in the replay buffer will be truncated after) reloading the replay buffer."
+        )),
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "log_level",
+            default_value="error",
+            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
+        ))
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "track",
+            default_value="true",
+            description="The level of logging that is applied to all ROS 2 nodes launched by this script.",
+        ))
+
+    env_variables = [
+        SetEnvironmentVariable(name="OMP_DYNAMIC", value="TRUE"),
+        SetEnvironmentVariable(name="OMP_NUM_THREADS", value=str(cpu_count() // 2))
+    ]
+
+    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)] + env_variables)
--- a/env_manager/rbs_gym/package.xml
+++ b/env_manager/rbs_gym/package.xml
@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>rbs_gym</name>
+  <version>0.0.0</version>
+  <description>TODO: Package description</description>
+  <maintainer email="ur.narmak@gmail.com">bill-finger</maintainer>
+  <license>Apache-2.0</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>ament_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
--- a/env_manager/rbs_gym/rbs_gym/init.py
+++ b/env_manager/rbs_gym/rbs_gym/init.py
--- a/env_manager/rbs_gym/rbs_gym/envs/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/init.py
@ -0,0 +1,217 @@
+# Note: The `open3d` and `stable_baselines3` modules must be imported prior to `gym_gz`
+import open3d  # isort:skip
+import stable_baselines3  # isort:skip
+
+# Note: If installed, `tensorflow` module must be imported before `gym_gz`/`scenario`
+# Otherwise, protobuf version incompatibility will cause an error
+try:
+    from importlib.util import find_spec
+
+    if find_spec("tensorflow") is not None:
+        import tensorflow
+except:
+    pass
+
+from os import environ, path
+from typing import Dict, Tuple
+
+import numpy as np
+from ament_index_python.packages import get_package_share_directory
+from gymnasium.envs.registration import register
+
+from rbs_gym.utils.utils import str2bool
+
+from . import tasks
+
+######################
+# Runtime Entrypoint #
+######################
+# Entrypoint for tasks (can be simulated or real)
+
+RBS_ENVS_TASK_ENTRYPOINT: str = (
+    "gym_gz.runtimes.gazebo_runtime:GazeboRuntime"
+)
+
+
+###################
+# Robot Specifics #
+###################
+# Default robot model to use in the tasks where robot can be static
+RBS_ENVS_ROBOT_MODEL: str = "rbs_arm"
+
+######################
+# Datasets and paths #
+######################
+# Path to directory containing base SDF worlds
+RBS_ENVS_WORLDS_DIR: str = path.join(
+    get_package_share_directory("rbs_gym"), "worlds"
+)
+
+
+###########
+# Presets #
+###########
+# Gravity preset for Earth
+GRAVITY_EARTH: Tuple[float, float, float] = (0.0, 0.0, -9.80665)
+GRAVITY_EARTH_STD: Tuple[float, float, float] =  (0.0, 0.0, 0.0232)
+
+############################
+# Additional Configuration #
+############################
+BROADCAST_GUI: bool = str2bool(
+    environ.get("RBS_ENVS_BROADCAST_INTERACTIVE_GUI", default=True)
+)
+
+
+#########
+# Reach #
+#########
+REACH_MAX_EPISODE_STEPS: int = 100
+REACH_KWARGS: Dict[str, any] = {
+    "agent_rate": 4.0,
+    "robot_model": RBS_ENVS_ROBOT_MODEL,
+    "workspace_frame_id": "world",
+    "workspace_centre": (-0.45, 0.0, 0.35),
+    "workspace_volume": (0.7, 0.7, 0.7),
+    "ignore_new_actions_while_executing": False,
+    "use_servo": True,
+    "scaling_factor_translation": 8.0,
+    "scaling_factor_rotation": 3.0,
+    "restrict_position_goal_to_workspace": False,
+    "enable_gripper": False,
+    "sparse_reward": False,
+    "collision_reward": -10.0,
+    "act_quick_reward": -0.01,
+    "required_accuracy": 0.05,
+    "num_threads": 3,
+}
+REACH_KWARGS_SIM: Dict[str, any] = {
+    "physics_rate": 1000.0,
+    "real_time_factor": float(np.finfo(np.float32).max),
+    "world": path.join(RBS_ENVS_WORLDS_DIR, "default.sdf"),
+}
+REACH_RANDOMIZER: str = "rbs_gym.envs.randomizers:ManipulationGazeboEnvRandomizer"
+REACH_KWARGS_RANDOMIZER: Dict[str, any] = {
+    "gravity": GRAVITY_EARTH,
+    "gravity_std": GRAVITY_EARTH_STD,
+    "plugin_scene_broadcaster": BROADCAST_GUI,
+    "plugin_user_commands": BROADCAST_GUI,
+    "plugin_sensors_render_engine": "ogre2",
+    "robot_random_pose": False,
+    "robot_random_joint_positions": True, # FIXME: 
+    "robot_random_joint_positions_std": 0.2,
+    "robot_random_joint_positions_above_object_spawn": False,
+    "robot_random_joint_positions_above_object_spawn_elevation": 0.0,
+    "robot_random_joint_positions_above_object_spawn_xy_randomness": 0.2,
+    "terrain_enable": True,
+    "light_type": "sun",
+    "light_direction": (0.5, 0.4, -0.2),
+    "light_random_minmax_elevation": (-0.15, -0.5),
+    "light_distance": 1000.0,
+    "light_visual": False,
+    "light_radius": 25.0,
+    "light_model_rollouts_num": 1,
+    "object_enable": True,
+    "object_type": "sphere",
+    "objects_relative_to": "base_link",
+    "object_static": True,
+    "object_collision": False,
+    "object_visual": True,
+    "object_color": (0.0, 0.0, 1.0, 1.0),
+    "object_dimensions": [0.025, 0.025, 0.025],
+    "object_count": 1,
+    "object_spawn_position": (-0.4, 0, 0.3),
+    "object_random_pose": True,
+    "object_random_spawn_position_segments": [],
+    "object_random_spawn_volume": (0.2, 0.2, 0.2),
+    "object_models_rollouts_num": 0,
+    "underworld_collision_plane": False,
+}
+REACH_KWARGS_RANDOMIZER_CAMERA: Dict[str, any] = {
+    "camera_enable": True,
+    "camera_width": 64,
+    "camera_height": 64,
+    "camera_update_rate": 1.2 * REACH_KWARGS["agent_rate"],
+    "camera_horizontal_fov": np.pi / 3.0,
+    "camera_vertical_fov": np.pi / 3.0,
+    "camera_noise_mean": 0.0,
+    "camera_noise_stddev": 0.001,
+    "camera_relative_to": "base_link",
+    "camera_spawn_position": (0.85, -0.4, 0.45),
+    "camera_spawn_quat_xyzw": (-0.0402991, -0.0166924, 0.9230002, 0.3823192),
+    "camera_random_pose_rollouts_num": 0,
+    "camera_random_pose_mode": "orbit",
+    "camera_random_pose_orbit_distance": 1.0,
+    "camera_random_pose_orbit_height_range": (0.1, 0.7),
+    "camera_random_pose_orbit_ignore_arc_behind_robot": np.pi / 8,
+    "camera_random_pose_select_position_options": [],
+    "camera_random_pose_focal_point_z_offset": 0.0,
+}
+
+# Task
+register(
+    id="Reach-v0",
+    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "task_cls": tasks.Reach,
+        **REACH_KWARGS,
+    },
+)
+register(
+    id="Reach-ColorImage-v0",
+    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "task_cls": tasks.ReachColorImage,
+        **REACH_KWARGS,
+    },
+)
+register(
+    id="Reach-DepthImage-v0",
+    entry_point=RBS_ENVS_TASK_ENTRYPOINT,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "task_cls": tasks.ReachDepthImage,
+        **REACH_KWARGS,
+    },
+)
+
+# Gazebo wrapper
+register(
+    id="Reach-Gazebo-v0",
+    entry_point=REACH_RANDOMIZER,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "env": "Reach-v0",
+        **REACH_KWARGS_SIM,
+        **REACH_KWARGS_RANDOMIZER,
+        "camera_enable": False,
+    },
+)
+register(
+    id="Reach-ColorImage-Gazebo-v0",
+    entry_point=REACH_RANDOMIZER,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "env": "Reach-ColorImage-v0",
+        **REACH_KWARGS_SIM,
+        **REACH_KWARGS_RANDOMIZER,
+        **REACH_KWARGS_RANDOMIZER_CAMERA,
+        "camera_type": "rgbd_camera",
+        "camera_publish_color": True,
+    },
+)
+register(
+    id="Reach-DepthImage-Gazebo-v0",
+    entry_point=REACH_RANDOMIZER,
+    max_episode_steps=REACH_MAX_EPISODE_STEPS,
+    kwargs={
+        "env": "Reach-DepthImage-v0",
+        **REACH_KWARGS_SIM,
+        **REACH_KWARGS_RANDOMIZER,
+        **REACH_KWARGS_RANDOMIZER_CAMERA,
+        "camera_type": "depth_camera",
+        "camera_publish_depth": True,
+    },
+)
--- a/env_manager/rbs_gym/rbs_gym/envs/control/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/init.py
@ -0,0 +1,3 @@
+from .cartesian_force_controller import CartesianForceController
+from .grippper_controller import GripperController
+from .joint_effort_controller import JointEffortController
--- a/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_force_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_force_controller.py
@ -0,0 +1,41 @@
+from typing import Optional
+from geometry_msgs.msg import WrenchStamped
+from rclpy.node import Node
+from rclpy.parameter import Parameter
+
+class CartesianForceController:
+    def __init__(self, node, namespace: Optional[str] = "") -> None:
+        self.node = node
+        self.publisher = node.create_publisher(WrenchStamped, 
+                                               namespace + "/cartesian_force_controller/target_wrench", 10)
+        self.timer = node.create_timer(0.1, self.timer_callback)
+        self.publish = False
+        self._target_wrench = WrenchStamped()
+
+    @property
+    def target_wrench(self) -> WrenchStamped:
+        return self._target_wrench
+
+    @target_wrench.setter
+    def target_wrench(self, wrench: WrenchStamped):
+        self._target_wrench = wrench
+
+    def timer_callback(self):
+        if self.publish:
+            self.publisher.publish(self._target_wrench)
+
+
+class CartesianForceControllerStandalone(Node, CartesianForceController):
+    def __init__(self, node_name:str = "rbs_gym_controller", use_sim_time: bool = True):
+
+        try:
+            rclpy.init()
+        except Exception as e:
+            if not rclpy.ok():
+                sys.exit(f"ROS 2 context could not be initialised: {e}")
+
+        Node.__init__(self, node_name)
+        self.set_parameters(
+            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
+        )
+        CartesianForceController.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_velocity_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/cartesian_velocity_controller.py
@ -0,0 +1,121 @@
+import rclpy
+from rclpy.node import Node
+import numpy as np
+import quaternion
+from geometry_msgs.msg import Twist
+from geometry_msgs.msg import PoseStamped
+import tf2_ros
+import sys
+import time
+import threading
+import os
+
+
+class VelocityController:
+    """Convert Twist messages to PoseStamped
+
+    Use this node to integrate twist messages into a moving target pose in
+    Cartesian space.  An initial TF lookup assures that the target pose always
+    starts at the robot's end-effector.
+    """
+
+    def __init__(self, node: Node, topic_pose: str, topic_twist: str, base_frame: str, ee_frame: str):
+        self.node = node
+
+        self._frame_id = base_frame
+        self._end_effector = ee_frame
+
+        self.tf_buffer = tf2_ros.Buffer()
+        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self)
+        self.rot = np.quaternion(0, 0, 0, 1)
+        self.pos = [0, 0, 0]
+
+        self.pub = node.create_publisher(PoseStamped, topic_pose, 3)
+        self.sub = node.create_subscription(Twist, topic_twist, self.twist_cb, 1)
+        self.last = time.time()
+
+        self.startup_done = False
+        period = 1.0 / node.declare_parameter("publishing_rate", 100).value
+        self.timer = node.create_timer(period, self.publish)
+        self.publish_it = False
+
+        self.thread = threading.Thread(target=self.startup, daemon=True)
+        self.thread.start()
+
+    def startup(self):
+        """Make sure to start at the robot's current pose"""
+        # Wait until we entered spinning in the main thread.
+        time.sleep(1)
+        try:
+            start = self.tf_buffer.lookup_transform(
+                target_frame=self._frame_id,
+                source_frame=self._end_effector,
+                time=rclpy.time.Time(),
+            )
+
+        except (
+            tf2_ros.InvalidArgumentException,
+            tf2_ros.LookupException,
+            tf2_ros.ConnectivityException,
+            tf2_ros.ExtrapolationException,
+        ) as e:
+            print(f"Startup failed: {e}")
+            os._exit(1)
+
+        self.pos[0] = start.transform.translation.x
+        self.pos[1] = start.transform.translation.y
+        self.pos[2] = start.transform.translation.z
+        self.rot.x = start.transform.rotation.x
+        self.rot.y = start.transform.rotation.y
+        self.rot.z = start.transform.rotation.z
+        self.rot.w = start.transform.rotation.w
+        self.startup_done = True
+
+    def twist_cb(self, data):
+        """Numerically integrate twist message into a pose
+
+        Use global self.frame_id as reference for the navigation commands.
+        """
+        now = time.time()
+        dt = now - self.last
+        self.last = now
+
+        # Position update
+        self.pos[0] += data.linear.x * dt
+        self.pos[1] += data.linear.y * dt
+        self.pos[2] += data.linear.z * dt
+
+        # Orientation update
+        wx = data.angular.x
+        wy = data.angular.y
+        wz = data.angular.z
+
+        _, q = quaternion.integrate_angular_velocity(
+            lambda _: (wx, wy, wz), 0, dt, self.rot
+        )
+
+        self.rot = q[-1]  # the last one is after dt passed
+
+    def publish(self):
+        if not self.startup_done:
+            return
+        if not self.publish_it:
+            return
+        try:
+            msg = PoseStamped()
+            msg.header.stamp = self.get_clock().now().to_msg()
+            msg.header.frame_id = self.frame_id
+            msg.pose.position.x = self.pos[0]
+            msg.pose.position.y = self.pos[1]
+            msg.pose.position.z = self.pos[2]
+            msg.pose.orientation.x = self.rot.x
+            msg.pose.orientation.y = self.rot.y
+            msg.pose.orientation.z = self.rot.z
+            msg.pose.orientation.w = self.rot.w
+
+            self.pub.publish(msg)
+        except Exception:
+            # Swallow 'publish() to closed topic' error.
+            # This rarely happens on killing this node.
+            pass
+
--- a/env_manager/rbs_gym/rbs_gym/envs/control/grippper_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/grippper_controller.py
@ -0,0 +1,46 @@
+from typing import Optional
+from control_msgs.action import GripperCommand
+from rclpy.action import ActionClient
+
+
+class GripperController:
+    def __init__(self, node,
+                 open_position: Optional[float] = 0.0,
+                 close_position: Optional[float] = 0.0,
+                 max_effort: Optional[float] = 0.0,
+                 namespace: Optional[str] = ""):
+        self._action_client = ActionClient(node, GripperCommand,
+                                           namespace + "/gripper_controller/gripper_cmd")
+        self._open_position = open_position
+        self._close_position = close_position
+        self._max_effort = max_effort
+
+    def open(self):
+        self.send_goal(self._open_position)
+
+    def close(self):
+        self.send_goal(self._close_position)
+
+    def send_goal(self, goal: float):
+        goal_msg = GripperCommand.Goal()
+        goal_msg._command.position = goal
+        goal_msg._command.max_effort = self._max_effort
+        self._action_client.wait_for_server()
+        self._send_goal_future = self._action_client.send_goal_async(goal_msg)
+        self._send_goal_future.add_done_callback(self.goal_response_callback)
+
+    def goal_response_callback(self, future):
+        goal_handle = future.result()
+        if not goal_handle.accepted:
+            self.get_logger().info('Goal rejected :(')
+            return
+
+        self.get_logger().info('Goal accepted :)')
+
+        self._get_result_future = goal_handle.get_result_async()
+        self._get_result_future.add_done_callback(self.get_result_callback)
+
+    def get_result_callback(self, future):
+        result = future.result().result
+        self.get_logger().info(f"Gripper position: {result.position}")
+
--- a/env_manager/rbs_gym/rbs_gym/envs/control/joint_effort_controller.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/control/joint_effort_controller.py
@ -0,0 +1,25 @@
+from typing import Optional
+from std_msgs.msg import Float64MultiArray
+
+class JointEffortController:
+    def __init__(self, node, namespace: Optional[str] = "") -> None:
+        self.node = node
+        self.publisher = node.create_publisher(Float64MultiArray, 
+                                               namespace + "/joint_effort_controller/commands", 10)
+        # self.timer = node.create_timer(0.1, self.timer_callback)
+        # self.publish = True
+        self._effort_array = Float64MultiArray()
+
+    @property
+    def target_effort(self) -> Float64MultiArray:
+        return self._effort_array
+
+    @target_effort.setter
+    def target_effort(self, data: Float64MultiArray):
+        self._effort_array = data
+
+    # def timer_callback(self):
+    #     if self.publish:
+    #         self.publisher.publish(self._target_wrench)
+
+
--- a/env_manager/rbs_gym/rbs_gym/envs/models/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/init.py
@ -0,0 +1,5 @@
+from .lights import *
+from .objects import *
+from .robots import *
+from .sensors import *
+from .terrains import *
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/init.py
@ -0,0 +1,20 @@
+from gym_gz.scenario.model_wrapper import ModelWrapper
+
+from .random_sun import RandomSun
+from .sun import Sun
+
+
+def get_light_model_class(light_type: str) -> ModelWrapper:
+    # TODO: Refactor into enum
+
+    if "sun" == light_type:
+        return Sun
+    elif "random_sun" == light_type:
+        return RandomSun
+
+
+def is_light_type_randomizable(light_type: str) -> bool:
+
+    if "random_sun" == light_type:
+        return True
+    return False
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/random_sun.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/random_sun.py
@ -0,0 +1,158 @@
+from typing import Optional, Tuple
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+
+class RandomSun(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "sun",
+        minmax_elevation: Tuple[float, float] = (-0.15, -0.65),
+        distance: float = 800.0,
+        visual: bool = True,
+        radius: float = 20.0,
+        color_minmax_r: Tuple[float, float] = (1.0, 1.0),
+        color_minmax_g: Tuple[float, float] = (1.0, 1.0),
+        color_minmax_b: Tuple[float, float] = (1.0, 1.0),
+        specular: float = 1.0,
+        attenuation_minmax_range: Tuple[float, float] = (750.0, 15000.0),
+        attenuation_minmax_constant: Tuple[float, float] = (0.5, 1.0),
+        attenuation_minmax_linear: Tuple[float, float] = (0.001, 0.1),
+        attenuation_minmax_quadratic: Tuple[float, float] = (0.0001, 0.01),
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        if np_random is None:
+            np_random = np.random.default_rng()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Get random yaw direction
+        direction = np_random.uniform(-1.0, 1.0, (2,))
+        # Normalize yaw direction
+        direction = direction / np.linalg.norm(direction)
+
+        # Get random elevation
+        direction = np.append(
+            direction,
+            np_random.uniform(minmax_elevation[0], minmax_elevation[1]),
+        )
+        # Normalize again
+        direction = direction / np.linalg.norm(direction)
+
+        # Initial pose
+        initial_pose = scenario.Pose(
+            (
+                -direction[0] * distance,
+                -direction[1] * distance,
+                -direction[2] * distance,
+            ),
+            (1, 0, 0, 0),
+        )
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            direction=direction,
+            visual=visual,
+            radius=radius,
+            color_minmax_r=color_minmax_r,
+            color_minmax_g=color_minmax_g,
+            color_minmax_b=color_minmax_b,
+            attenuation_minmax_range=attenuation_minmax_range,
+            attenuation_minmax_constant=attenuation_minmax_constant,
+            attenuation_minmax_linear=attenuation_minmax_linear,
+            attenuation_minmax_quadratic=attenuation_minmax_quadratic,
+            specular=specular,
+            np_random=np_random,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        direction: Tuple[float, float, float],
+        visual: bool,
+        radius: float,
+        color_minmax_r: Tuple[float, float],
+        color_minmax_g: Tuple[float, float],
+        color_minmax_b: Tuple[float, float],
+        attenuation_minmax_range: Tuple[float, float],
+        attenuation_minmax_constant: Tuple[float, float],
+        attenuation_minmax_linear: Tuple[float, float],
+        attenuation_minmax_quadratic: Tuple[float, float],
+        specular: float,
+        np_random: RandomState,
+    ) -> str:
+
+        # Sample random values for parameters
+        color_r = np_random.uniform(color_minmax_r[0], color_minmax_r[1])
+        color_g = np_random.uniform(color_minmax_g[0], color_minmax_g[1])
+        color_b = np_random.uniform(color_minmax_b[0], color_minmax_b[1])
+        attenuation_range = np_random.uniform(
+            attenuation_minmax_range[0], attenuation_minmax_range[1]
+        )
+        attenuation_constant = np_random.uniform(
+            attenuation_minmax_constant[0], attenuation_minmax_constant[1]
+        )
+        attenuation_linear = np_random.uniform(
+            attenuation_minmax_linear[0], attenuation_minmax_linear[1]
+        )
+        attenuation_quadratic = np_random.uniform(
+            attenuation_minmax_quadratic[0], attenuation_minmax_quadratic[1]
+        )
+
+        return f'''<sdf version="1.9">
+                <model name="{model_name}">
+                    <static>true</static>
+                    <link name="{model_name}_link">
+                        <light type="directional" name="{model_name}_light">
+                            <direction>{direction[0]} {direction[1]} {direction[2]}</direction>
+                            <attenuation>
+                                <range>{attenuation_range}</range>
+                                <constant>{attenuation_constant}</constant>
+                                <linear>{attenuation_linear}</linear>
+                                <quadratic>{attenuation_quadratic}</quadratic>
+                            </attenuation>
+                            <diffuse>{color_r} {color_g} {color_b} 1</diffuse>
+                            <specular>{specular*color_r} {specular*color_g} {specular*color_b} 1</specular>
+                            <cast_shadows>true</cast_shadows>
+                        </light>
+                        {
+                        f"""
+                        <visual name="{model_name}_visual">
+                            <geometry>
+                                <sphere>
+                                    <radius>{radius}</radius>
+                                </sphere>
+                            </geometry>
+                            <material>
+                                <emissive>{color_r} {color_g} {color_b} 1</emissive>
+                            </material>
+                            <cast_shadows>false</cast_shadows>
+                        </visual>
+                        """ if visual else ""
+                        }
+                    </link>
+                </model>
+            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/lights/sun.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/lights/sun.py
@ -0,0 +1,119 @@
+from typing import List, Tuple
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Sun(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "sun",
+        direction: Tuple[float, float, float] = (0.5, -0.25, -0.75),
+        color: List[float] = (1.0, 1.0, 1.0, 1.0),
+        distance: float = 800.0,
+        visual: bool = True,
+        radius: float = 20.0,
+        specular: float = 1.0,
+        attenuation_range: float = 10000.0,
+        attenuation_constant: float = 0.9,
+        attenuation_linear: float = 0.01,
+        attenuation_quadratic: float = 0.001,
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Normalize direction
+        direction = np.array(direction)
+        direction = direction / np.linalg.norm(direction)
+
+        # Initial pose
+        initial_pose = scenario.Pose(
+            (
+                -direction[0] * distance,
+                -direction[1] * distance,
+                -direction[2] * distance,
+            ),
+            (1, 0, 0, 0),
+        )
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            direction=direction,
+            color=color,
+            visual=visual,
+            radius=radius,
+            specular=specular,
+            attenuation_range=attenuation_range,
+            attenuation_constant=attenuation_constant,
+            attenuation_linear=attenuation_linear,
+            attenuation_quadratic=attenuation_quadratic,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        direction: Tuple[float, float, float],
+        color: Tuple[float, float, float, float],
+        visual: bool,
+        radius: float,
+        specular: float,
+        attenuation_range: float,
+        attenuation_constant: float,
+        attenuation_linear: float,
+        attenuation_quadratic: float,
+    ) -> str:
+
+        return f'''<sdf version="1.9">
+                <model name="{model_name}">
+                    <static>true</static>
+                    <link name="{model_name}_link">
+                        <light type="directional" name="{model_name}_light">
+                            <direction>{direction[0]} {direction[1]} {direction[2]}</direction>
+                            <attenuation>
+                                <range>{attenuation_range}</range>
+                                <constant>{attenuation_constant}</constant>
+                                <linear>{attenuation_linear}</linear>
+                                <quadratic>{attenuation_quadratic}</quadratic>
+                            </attenuation>
+                            <diffuse>{color[0]} {color[1]} {color[2]} 1</diffuse>
+                            <specular>{specular*color[0]} {specular*color[1]} {specular*color[2]} 1</specular>
+                            <cast_shadows>true</cast_shadows>
+                        </light>
+                        {
+                        f"""
+                        <visual name="{model_name}_visual">
+                            <geometry>
+                                <sphere>
+                                    <radius>{radius}</radius>
+                                </sphere>
+                            </geometry>
+                            <material>
+                                <emissive>{color[0]} {color[1]} {color[2]} 1</emissive>
+                            </material>
+                            <cast_shadows>false</cast_shadows>
+                        </visual>
+                        """ if visual else ""
+                        }
+                    </link>
+                </model>
+            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/init.py
@ -0,0 +1,35 @@
+from gym_gz.scenario.model_wrapper import ModelWrapper
+
+from .primitives import Box, Cylinder, Plane, Sphere
+from .random_lunar_rock import RandomLunarRock
+from .random_object import RandomObject
+from .random_primitive import RandomPrimitive
+from .rock import Rock
+
+
+def get_object_model_class(object_type: str) -> ModelWrapper:
+    # TODO: Refactor into enum
+
+    if "box" == object_type:
+        return Box
+    elif "sphere" == object_type:
+        return Sphere
+    elif "cylinder" == object_type:
+        return Cylinder
+    elif "random_primitive" == object_type:
+        return RandomPrimitive
+    elif "random_mesh" == object_type:
+        return RandomObject
+    elif "rock" == object_type:
+        return Rock
+    elif "random_lunar_rock" == object_type:
+        return RandomLunarRock
+
+
+def is_object_type_randomizable(object_type: str) -> bool:
+
+    return (
+        "random_primitive" == object_type
+        or "random_mesh" == object_type
+        or "random_lunar_rock" == object_type
+    )
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/init.py
@ -0,0 +1,4 @@
+from .box import Box
+from .cylinder import Cylinder
+from .plane import Plane
+from .sphere import Sphere
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/box.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/box.py
@ -0,0 +1,129 @@
+from typing import List
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Box(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "box",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        size: List[float] = (0.05, 0.05, 0.05),
+        mass: float = 0.1,
+        static: bool = False,
+        collision: bool = True,
+        friction: float = 1.0,
+        visual: bool = True,
+        gui_only: bool = False,
+        color: List[float] = (0.8, 0.8, 0.8, 1.0),
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            size=size,
+            mass=mass,
+            static=static,
+            collision=collision,
+            friction=friction,
+            visual=visual,
+            gui_only=gui_only,
+            color=color,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        size: List[float],
+        mass: float,
+        static: bool,
+        collision: bool,
+        friction: float,
+        visual: bool,
+        gui_only: bool,
+        color: List[float],
+    ) -> str:
+        return f'''<sdf version="1.7">
+                <model name="{model_name}">
+                    <static>{"true" if static else "false"}</static>
+                    <link name="{model_name}_link">
+                        {
+                        f"""
+                        <collision name="{model_name}_collision">
+                            <geometry>
+                                <box>
+                                    <size>{size[0]} {size[1]} {size[2]}</size>
+                                </box>
+                            </geometry>
+                            <surface>
+                                <friction>
+                                    <ode>
+                                        <mu>{friction}</mu>
+                                        <mu2>{friction}</mu2>
+                                        <fdir1>0 0 0</fdir1>
+                                        <slip1>0.0</slip1>
+                                        <slip2>0.0</slip2>
+                                    </ode>
+                                </friction>
+                            </surface>
+                        </collision>
+                        """ if collision else ""
+                        }
+                        {
+                        f"""
+                        <visual name="{model_name}_visual">
+                            <geometry>
+                                <box>
+                                    <size>{size[0]} {size[1]} {size[2]}</size>
+                                </box>
+                            </geometry>
+                            <material>
+                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
+                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
+                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
+                            </material>
+                            <transparency>{1.0-color[3]}</transparency>
+                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
+                        </visual>
+                        """ if visual else ""
+                        }
+                        <inertial>
+                            <mass>{mass}</mass>
+                            <inertia>
+                                <ixx>{(size[1]**2 + size[2]**2)*mass/12}</ixx>
+                                <iyy>{(size[0]**2 + size[2]**2)*mass/12}</iyy>
+                                <izz>{(size[0]**2 + size[1]**2)*mass/12}</izz>
+                                <ixy>0.0</ixy>
+                                <ixz>0.0</ixz>
+                                <iyz>0.0</iyz>
+                            </inertia>
+                        </inertial>
+                    </link>
+                </model>
+            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/cylinder.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/cylinder.py
@ -0,0 +1,137 @@
+from typing import List
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Cylinder(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "cylinder",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        radius: float = 0.025,
+        length: float = 0.1,
+        mass: float = 0.1,
+        static: bool = False,
+        collision: bool = True,
+        friction: float = 1.0,
+        visual: bool = True,
+        gui_only: bool = False,
+        color: List[float] = (0.8, 0.8, 0.8, 1.0),
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            radius=radius,
+            length=length,
+            mass=mass,
+            static=static,
+            collision=collision,
+            friction=friction,
+            visual=visual,
+            gui_only=gui_only,
+            color=color,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        radius: float,
+        length: float,
+        mass: float,
+        static: bool,
+        collision: bool,
+        friction: float,
+        visual: bool,
+        gui_only: bool,
+        color: List[float],
+    ) -> str:
+        # Inertia is identical for xx and yy components, compute only once
+        inertia_xx_yy = (3 * radius**2 + length**2) * mass / 12
+
+        return f'''<sdf version="1.7">
+                <model name="{model_name}">
+                    <static>{"true" if static else "false"}</static>
+                    <link name="{model_name}_link">
+                        {
+                        f"""
+                        <collision name="{model_name}_collision">
+                            <geometry>
+                                <cylinder>
+                                    <radius>{radius}</radius>
+                                    <length>{length}</length>
+                                </cylinder>
+                            </geometry>
+                            <surface>
+                                <friction>
+                                    <ode>
+                                        <mu>{friction}</mu>
+                                        <mu2>{friction}</mu2>
+                                        <fdir1>0 0 0</fdir1>
+                                        <slip1>0.0</slip1>
+                                        <slip2>0.0</slip2>
+                                    </ode>
+                                </friction>
+                            </surface>
+                        </collision>
+                        """ if collision else ""
+                        }
+                        {
+                        f"""
+                        <visual name="{model_name}_visual">
+                            <geometry>
+                                <cylinder>
+                                    <radius>{radius}</radius>
+                                    <length>{length}</length>
+                                </cylinder>
+                            </geometry>
+                            <material>
+                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
+                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
+                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
+                            </material>
+                            <transparency>{1.0-color[3]}</transparency>
+                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
+                        </visual>
+                        """ if visual else ""
+                        }
+                        <inertial>
+                            <mass>{mass}</mass>
+                            <inertia>
+                                <ixx>{inertia_xx_yy}</ixx>
+                                <iyy>{inertia_xx_yy}</iyy>
+                                <izz>{(mass*radius**2)/2}</izz>
+                                <ixy>0.0</ixy>
+                                <ixz>0.0</ixz>
+                                <iyz>0.0</iyz>
+                            </inertia>
+                        </inertial>
+                    </link>
+                </model>
+            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/plane.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/plane.py
@ -0,0 +1,90 @@
+from typing import List
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Plane(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "plane",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        size: List[float] = (1.0, 1.0),
+        direction: List[float] = (0.0, 0.0, 1.0),
+        collision: bool = True,
+        friction: float = 1.0,
+        visual: bool = True,
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = f'''<sdf version="1.7">
+            <model name="{model_name}">
+                <static>true</static>
+                <link name="{model_name}_link">
+                    {
+                    f"""
+                    <collision name="{model_name}_collision">
+                        <geometry>
+                            <plane>
+                                <normal>{direction[0]} {direction[1]} {direction[2]}</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <surface>
+                            <friction>
+                                <ode>
+                                    <mu>{friction}</mu>
+                                    <mu2>{friction}</mu2>
+                                    <fdir1>0 0 0</fdir1>
+                                    <slip1>0.0</slip1>
+                                    <slip2>0.0</slip2>
+                                </ode>
+                            </friction>
+                        </surface>
+                    </collision>
+                    """ if collision else ""
+                    }
+                    {
+                    f"""
+                    <visual name="{model_name}_visual">
+                        <geometry>
+                            <plane>
+                                <normal>{direction[0]} {direction[1]} {direction[2]}</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <material>
+                            <ambient>0.8 0.8 0.8 1</ambient>
+                            <diffuse>0.8 0.8 0.8 1</diffuse>
+                            <specular>0.8 0.8 0.8 1</specular>
+                        </material>
+                    </visual>
+                    """ if visual else ""
+                    }
+                </link>
+            </model>
+        </sdf>'''
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/sphere.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/primitives/sphere.py
@ -0,0 +1,132 @@
+from typing import List
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Sphere(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "sphere",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        radius: float = 0.025,
+        mass: float = 0.1,
+        static: bool = False,
+        collision: bool = True,
+        friction: float = 1.0,
+        visual: bool = True,
+        gui_only: bool = False,
+        color: List[float] = (0.8, 0.8, 0.8, 1.0),
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            radius=radius,
+            mass=mass,
+            static=static,
+            collision=collision,
+            friction=friction,
+            visual=visual,
+            gui_only=gui_only,
+            color=color,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        radius: float,
+        mass: float,
+        static: bool,
+        collision: bool,
+        friction: float,
+        visual: bool,
+        gui_only: bool,
+        color: List[float],
+    ) -> str:
+        # Inertia is identical for all axes
+        inertia_xx_yy_zz = (mass * radius**2) * 2 / 5
+
+        return f'''<sdf version="1.7">
+                <model name="{model_name}">
+                    <static>{"true" if static else "false"}</static>
+                    <link name="{model_name}_link">
+                        {
+                        f"""
+                        <collision name="{model_name}_collision">
+                            <geometry>
+                                <sphere>
+                                    <radius>{radius}</radius>
+                                </sphere>
+                            </geometry>
+                            <surface>
+                                <friction>
+                                    <ode>
+                                        <mu>{friction}</mu>
+                                        <mu2>{friction}</mu2>
+                                        <fdir1>0 0 0</fdir1>
+                                        <slip1>0.0</slip1>
+                                        <slip2>0.0</slip2>
+                                    </ode>
+                                </friction>
+                            </surface>
+                        </collision>
+                        """ if collision else ""
+                        }
+                        {
+                        f"""
+                        <visual name="{model_name}_visual">
+                            <geometry>
+                                <sphere>
+                                    <radius>{radius}</radius>
+                                </sphere>
+                            </geometry>
+                            <material>
+                                <ambient>{color[0]} {color[1]} {color[2]} {color[3]}</ambient>
+                                <diffuse>{color[0]} {color[1]} {color[2]} {color[3]}</diffuse>
+                                <specular>{color[0]} {color[1]} {color[2]} {color[3]}</specular>
+                            </material>
+                            <transparency>{1.0-color[3]}</transparency>
+                            {'<visibility_flags>1</visibility_flags> <cast_shadows>false</cast_shadows>' if gui_only else ''}
+                        </visual>
+                        """ if visual else ""
+                        }
+                        <inertial>
+                            <mass>{mass}</mass>
+                            <inertia>
+                                <ixx>{inertia_xx_yy_zz}</ixx>
+                                <iyy>{inertia_xx_yy_zz}</iyy>
+                                <izz>{inertia_xx_yy_zz}</izz>
+                                <ixy>0.0</ixy>
+                                <ixz>0.0</ixz>
+                                <iyz>0.0</iyz>
+                            </inertia>
+                        </inertial>
+                    </link>
+                </model>
+            </sdf>'''
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_lunar_rock.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_lunar_rock.py
@ -0,0 +1,57 @@
+import os
+from typing import List, Optional
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+
+class RandomLunarRock(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "rock",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        models_dir: Optional[str] = None,
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        if np_random is None:
+            np_random = np.random.default_rng()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Get path to all lunar rock models
+        if not models_dir:
+            models_dir = os.environ.get("SDF_PATH_LUNAR_ROCK", default="")
+
+        # Make sure the path exists
+        if not os.path.exists(models_dir):
+            raise ValueError(
+                f"Invalid path '{models_dir}' pointed by 'SDF_PATH_LUNAR_ROCK' environment variable."
+            )
+
+        # Select a single model at random
+        model_dir = np_random.choice(os.listdir(models_dir))
+        sdf_filepath = os.path.join(model_dir, "model.sdf")
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_file(
+            sdf_filepath, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_object.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_object.py
@ -0,0 +1,60 @@
+from typing import List, Optional
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+from rbs_gym.envs.models.utils import ModelCollectionRandomizer
+
+
+class RandomObject(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "object",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        model_paths: str = None,
+        owner: str = "GoogleResearch",
+        collection: str = "Google Scanned Objects",
+        server: str = "https://fuel.ignitionrobotics.org",
+        server_version: str = "1.0",
+        unique_cache: bool = False,
+        reset_collection: bool = False,
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        model_collection_randomizer = ModelCollectionRandomizer(
+            model_paths=model_paths,
+            owner=owner,
+            collection=collection,
+            server=server,
+            server_version=server_version,
+            unique_cache=unique_cache,
+            reset_collection=reset_collection,
+            np_random=np_random,
+        )
+
+        # Note: using default arguments here
+        modified_sdf_file = model_collection_randomizer.random_model()
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model(
+            modified_sdf_file, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_primitive.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/random_primitive.py
@ -0,0 +1,124 @@
+from typing import List, Optional, Union
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+from . import Box, Cylinder, Sphere
+
+
+class RandomPrimitive(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "primitive",
+        use_specific_primitive: Union[str, None] = None,
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        static: bool = False,
+        collision: bool = True,
+        visual: bool = True,
+        gui_only: bool = False,
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        if np_random is None:
+            np_random = np.random.default_rng()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = self.get_sdf(
+            model_name=model_name,
+            use_specific_primitive=use_specific_primitive,
+            static=static,
+            collision=collision,
+            visual=visual,
+            gui_only=gui_only,
+            np_random=np_random,
+        )
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+    @classmethod
+    def get_sdf(
+        self,
+        model_name: str,
+        use_specific_primitive: Union[str, None],
+        static: bool,
+        collision: bool,
+        visual: bool,
+        gui_only: bool,
+        np_random: RandomState,
+    ) -> str:
+
+        if use_specific_primitive is not None:
+            primitive = use_specific_primitive
+        else:
+            primitive = np_random.choice(["box", "cylinder", "sphere"])
+
+        mass = np_random.uniform(0.05, 0.25)
+        friction = np_random.uniform(0.75, 1.5)
+        color = list(np_random.uniform(0.0, 1.0, (3,)))
+        color.append(1.0)
+
+        if "box" == primitive:
+            return Box.get_sdf(
+                model_name=model_name,
+                size=list(np_random.uniform(0.04, 0.06, (3,))),
+                mass=mass,
+                static=static,
+                collision=collision,
+                friction=friction,
+                visual=visual,
+                gui_only=gui_only,
+                color=color,
+            )
+        elif "cylinder" == primitive:
+            return Cylinder.get_sdf(
+                model_name=model_name,
+                radius=np_random.uniform(0.01, 0.0375),
+                length=np_random.uniform(0.025, 0.05),
+                mass=mass,
+                static=static,
+                collision=collision,
+                friction=friction,
+                visual=visual,
+                gui_only=gui_only,
+                color=color,
+            )
+        elif "sphere" == primitive:
+            return Sphere.get_sdf(
+                model_name=model_name,
+                radius=np_random.uniform(0.01, 0.0375),
+                mass=mass,
+                static=static,
+                collision=collision,
+                friction=friction,
+                visual=visual,
+                gui_only=gui_only,
+                color=color,
+            )
+        else:
+            raise TypeError(
+                f"Type '{use_specific_primitive}' in not a supported primitive. Pleasure use 'box', 'cylinder' or 'sphere."
+            )
--- a/env_manager/rbs_gym/rbs_gym/envs/models/objects/rock.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/objects/rock.py
@ -0,0 +1,52 @@
+from typing import List
+
+from gym_gz.scenario import model_with_file, model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+from scenario import gazebo as scenario_gazebo
+
+
+class Rock(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "rock",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        model_file: str = None,
+        use_fuel: bool = True,
+        variant: int = 6,
+        **kwargs,
+    ):
+
+        # Allow passing of custom model file as an argument
+        if model_file is None:
+            model_file = self.get_model_file(fuel=use_fuel, variant=variant)
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_file(
+            model_file, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        super().__init__(model=model)
+
+    @classmethod
+    def get_model_file(self, fuel: bool = False, variant: int = 6) -> str:
+        if fuel:
+            return scenario_gazebo.get_model_file_from_fuel(
+                f"https://fuel.ignitionrobotics.org/1.0/OpenRobotics/models/Falling Rock {variant}"
+            )
+        else:
+            return "lunar_surface"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/init.py
@ -0,0 +1,15 @@
+from gym_gz.scenario.model_wrapper import ModelWrapper
+
+from .rbs_arm import RbsArm
+# from .panda import Panda
+
+# TODO: When adding new a robot, create abstract classes to simplify such process
+
+
+def get_robot_model_class(robot_model: str) -> ModelWrapper:
+    # TODO: Refactor into enum
+
+    if "rbs_arm" == robot_model:
+        return RbsArm
+    # elif "panda" == robot_model:
+    #     return Panda
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/panda.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/panda.py
@ -0,0 +1,348 @@
+from os import path
+from typing import Dict, List, Optional, Tuple
+
+from ament_index_python.packages import get_package_share_directory
+from gym_gz.scenario import model_with_file, model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+from scenario import gazebo as scenario_gazebo
+
+from rbs_gym.envs.models.utils import xacro2sdf
+
+
+class Panda(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
+
+    ROBOT_MODEL_NAME: str = "panda"
+    DEFAULT_PREFIX: str = "panda_"
+
+    __DESCRIPTION_PACKAGE = ROBOT_MODEL_NAME + "_description"
+    __DEFAULT_XACRO_FILE = path.join(
+        get_package_share_directory(__DESCRIPTION_PACKAGE),
+        "urdf",
+        ROBOT_MODEL_NAME + ".urdf.xacro",
+    )
+    __DEFAULT_XACRO_MAPPINGS: Dict[str, any] = {
+        "name": ROBOT_MODEL_NAME,
+        "gripper": True,
+        "collision_arm": False,
+        "collision_gripper": True,
+        "ros2_control": True,
+        "ros2_control_plugin": "ign",
+        "ros2_control_command_interface": "effort",
+        "gazebo_preserve_fixed_joint": True,
+    }
+    __XACRO_MODEL_PATH_REMAP: Tuple[str, str] = (
+        __DESCRIPTION_PACKAGE,
+        ROBOT_MODEL_NAME,
+    )
+
+    DEFAULT_ARM_JOINT_POSITIONS: List[float] = (
+        0.0,
+        -0.7853981633974483,
+        0.0,
+        -2.356194490192345,
+        0.0,
+        1.5707963267948966,
+        0.7853981633974483,
+    )
+    OPEN_GRIPPER_JOINT_POSITIONS: List[float] = (
+        0.04,
+        0.04,
+    )
+    CLOSED_GRIPPER_JOINT_POSITIONS: List[float] = (
+        0.0,
+        0.0,
+    )
+    DEFAULT_GRIPPER_JOINT_POSITIONS: List[float] = OPEN_GRIPPER_JOINT_POSITIONS
+
+    BASE_LINK_Z_OFFSET: float = 0.0
+
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = ROBOT_MODEL_NAME,
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        model_file: str = None,
+        use_fuel: bool = False,
+        use_xacro: bool = True,
+        xacro_file: str = __DEFAULT_XACRO_FILE,
+        xacro_mappings: Dict[str, any] = __DEFAULT_XACRO_MAPPINGS,
+        initial_arm_joint_positions: List[float] = DEFAULT_ARM_JOINT_POSITIONS,
+        initial_gripper_joint_positions: List[float] = OPEN_GRIPPER_JOINT_POSITIONS,
+        **kwargs,
+    ):
+
+        # Store params that are needed internally
+        self.__prefix = f"{name}_"
+        self.__initial_arm_joint_positions = initial_arm_joint_positions
+        self.__initial_gripper_joint_positions = initial_gripper_joint_positions
+
+        # Allow passing of custom model file as an argument
+        if model_file is None:
+            if use_xacro:
+                # Generate SDF from xacro
+                mappings = self.__DEFAULT_XACRO_MAPPINGS
+                mappings.update(kwargs)
+                mappings.update(xacro_mappings)
+                model_file = xacro2sdf(
+                    input_file_path=xacro_file,
+                    mappings=mappings,
+                    model_path_remap=self.__XACRO_MODEL_PATH_REMAP,
+                )
+            else:
+                # Otherwise, use the default SDF file (local or fuel)
+                model_file = self.get_model_file(fuel=use_fuel)
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Determine whether to insert from string or file
+        if use_xacro:
+            insert_fn = scenario_gazebo.World.insert_model_from_string
+        else:
+            insert_fn = scenario_gazebo.World.insert_model_from_file
+
+        # Insert the model
+        ok_model = insert_fn(world.to_gazebo(), model_file, initial_pose, model_name)
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Set initial joint configuration
+        self.set_initial_joint_positions(model)
+
+        # Initialize base class
+        super().__init__(model=model)
+
+    def set_initial_joint_positions(self, model):
+
+        model = model.to_gazebo()
+        if not model.reset_joint_positions(
+            self.initial_arm_joint_positions, self.arm_joint_names
+        ):
+            raise RuntimeError("Failed to set initial positions of arm's joints")
+        if not model.reset_joint_positions(
+            self.initial_gripper_joint_positions, self.gripper_joint_names
+        ):
+            raise RuntimeError("Failed to set initial positions of gripper's joints")
+
+    @classmethod
+    def get_model_file(cls, fuel: bool = False) -> str:
+
+        if fuel:
+            raise NotImplementedError
+            return scenario_gazebo.get_model_file_from_fuel(
+                "https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/"
+                + cls.ROBOT_MODEL_NAME
+            )
+        else:
+            return cls.ROBOT_MODEL_NAME
+
+    # Meta information #
+    @property
+    def is_mobile(self) -> bool:
+
+        return False
+
+    # Prefix #
+    @property
+    def prefix(self) -> str:
+
+        return self.__prefix
+
+    # Joints #
+    @property
+    def joint_names(self) -> List[str]:
+
+        return self.move_base_joint_names + self.manipulator_joint_names
+
+    @property
+    def move_base_joint_names(self) -> List[str]:
+
+        return []
+
+    @property
+    def manipulator_joint_names(self) -> List[str]:
+
+        return self.arm_joint_names + self.gripper_joint_names
+
+    @classmethod
+    def get_arm_joint_names(cls, prefix: str = "") -> List[str]:
+
+        return [
+            prefix + "joint1",
+            prefix + "joint2",
+            prefix + "joint3",
+            prefix + "joint4",
+            prefix + "joint5",
+            prefix + "joint6",
+            prefix + "joint7",
+        ]
+
+    @property
+    def arm_joint_names(self) -> List[str]:
+
+        return self.get_arm_joint_names(self.prefix)
+
+    @classmethod
+    def get_gripper_joint_names(cls, prefix: str = "") -> List[str]:
+
+        return [
+            prefix + "finger_joint1",
+            prefix + "finger_joint2",
+        ]
+
+    @property
+    def gripper_joint_names(self) -> List[str]:
+
+        return self.get_gripper_joint_names(self.prefix)
+
+    @property
+    def move_base_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
+
+        return None
+
+    @property
+    def arm_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
+
+        return [
+            (-2.897246558310587, 2.897246558310587),
+            (-1.762782544514273, 1.762782544514273),
+            (-2.897246558310587, 2.897246558310587),
+            (-3.07177948351002, -0.06981317007977318),
+            (-2.897246558310587, 2.897246558310587),
+            (-0.0174532925199433, 3.752457891787809),
+            (-2.897246558310587, 2.897246558310587),
+        ]
+
+    @property
+    def gripper_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
+
+        return [
+            (0.0, 0.04),
+            (0.0, 0.04),
+        ]
+
+    @property
+    def gripper_joints_close_towards_positive(self) -> bool:
+
+        return (
+            self.OPEN_GRIPPER_JOINT_POSITIONS[0]
+            < self.CLOSED_GRIPPER_JOINT_POSITIONS[0]
+        )
+
+    @property
+    def initial_arm_joint_positions(self) -> List[float]:
+
+        return self.__initial_arm_joint_positions
+
+    @property
+    def initial_gripper_joint_positions(self) -> List[float]:
+
+        return self.__initial_gripper_joint_positions
+
+    # Passive joints #
+    @property
+    def passive_joint_names(self) -> List[str]:
+
+        return self.manipulator_passive_joint_names + self.move_base_passive_joint_names
+
+    @property
+    def move_base_passive_joint_names(self) -> List[str]:
+
+        return []
+
+    @property
+    def manipulator_passive_joint_names(self) -> List[str]:
+
+        return self.arm_passive_joint_names + self.gripper_passive_joint_names
+
+    @property
+    def arm_passive_joint_names(self) -> List[str]:
+
+        return []
+
+    @property
+    def gripper_passive_joint_names(self) -> List[str]:
+
+        return []
+
+    # Links #
+    @classmethod
+    def get_robot_base_link_name(cls, prefix: str = "") -> str:
+
+        return cls.get_arm_base_link_name(prefix)
+
+    @property
+    def robot_base_link_name(self) -> str:
+
+        return self.get_robot_base_link_name(self.prefix)
+
+    @classmethod
+    def get_arm_base_link_name(cls, prefix: str = "") -> str:
+
+        # Same as `self.arm_link_names[0]``
+        return prefix + "link0"
+
+    @property
+    def arm_base_link_name(self) -> str:
+
+        return self.get_arm_base_link_name(self.prefix)
+
+    @classmethod
+    def get_ee_link_name(cls, prefix: str = "") -> str:
+
+        return prefix + "hand_tcp"
+
+    @property
+    def ee_link_name(self) -> str:
+
+        return self.get_ee_link_name(self.prefix)
+
+    @classmethod
+    def get_wheel_link_names(cls, prefix: str = "") -> List[str]:
+
+        return []
+
+    @property
+    def wheel_link_names(self) -> List[str]:
+
+        return self.get_wheel_link_names(self.prefix)
+
+    @classmethod
+    def get_arm_link_names(cls, prefix: str = "") -> List[str]:
+
+        return [
+            prefix + "link0",
+            prefix + "link1",
+            prefix + "link2",
+            prefix + "link3",
+            prefix + "link4",
+            prefix + "link5",
+            prefix + "link6",
+            prefix + "link7",
+        ]
+
+    @property
+    def arm_link_names(self) -> List[str]:
+
+        return self.get_arm_link_names(self.prefix)
+
+    @classmethod
+    def get_gripper_link_names(cls, prefix: str = "") -> List[str]:
+
+        return [
+            prefix + "leftfinger",
+            prefix + "rightfinger",
+        ]
+
+    @property
+    def gripper_link_names(self) -> List[str]:
+
+        return self.get_gripper_link_names(self.prefix)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/robots/rbs_arm.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/robots/rbs_arm.py
@ -0,0 +1,261 @@
+from typing import Dict, List, Optional, Tuple
+
+from gym_gz.scenario import model_with_file, model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import gazebo as sgaz
+import numpy as np
+
+from scenario import core as scenario
+from scenario import gazebo as scenario_gazebo
+
+class RbsArm(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
+
+    DEFAULT_ARM_JOINT_POSITIONS: List[float] = (
+        0.0, 0.5, 3.14159, 1.5, 0.0, 1.4, 0.0,
+    )
+
+    OPEN_GRIPPER_JOINT_POSITIONS: List[float] = (
+        0.064,
+        0.064,
+    )
+
+    CLOSED_GRIPPER_JOINT_POSITIONS: List[float] = (
+        0.0,
+        0.0,
+    )
+
+    DEFAULT_GRIPPER_JOINT_POSITIONS: List[float] = OPEN_GRIPPER_JOINT_POSITIONS
+
+    BASE_LINK_Z_OFFSET: float = 0.0
+    DEFAULT_PREFIX: str = ""
+    ROBOT_MODEL_NAME: str = "rbs_arm"
+
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "rbs_arm",
+        position: List[float] = (0.0, 0.0, 0.0),
+        orientation: List[float] = (1.0, 0, 0, 0),
+        model_file: Optional[str] = None,
+        use_fuel: bool = False,
+        use_xacro: bool = False,
+        xacro_file: str = "",
+        xacro_mappings: Dict[str, any] = {},
+        initial_arm_joint_positions: List[float] = DEFAULT_ARM_JOINT_POSITIONS,
+        initial_gripper_joint_positions: List[float] = DEFAULT_GRIPPER_JOINT_POSITIONS,
+        **kwargs
+    ):
+
+        self.__prefix = f"{name}_"
+        self.__initial_arm_joint_positions = initial_arm_joint_positions
+        self.__initial_gripper_joint_positions = initial_gripper_joint_positions
+
+        model_file = self.get_model_file()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Determine whether to insert from string or file
+        if use_xacro:
+            insert_fn = scenario_gazebo.World.insert_model_from_string
+        else:
+            insert_fn = scenario_gazebo.World.insert_model_from_file
+
+        # Insert the model
+        ok_model = insert_fn(world.to_gazebo(), model_file, initial_pose, model_name)
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Set initial joint configuration
+        self.set_initial_joint_positions(model)
+
+        # Initialize base class
+        super().__init__(model=model)
+
+    def set_initial_joint_positions(self, model):
+
+        model = model.to_gazebo()
+        if not model.reset_joint_positions(
+            self.initial_arm_joint_positions, self.arm_joint_names
+        ):
+            raise RuntimeError("Failed to set initial positions of arm's joints")
+        if not model.reset_joint_positions(
+            self.initial_gripper_joint_positions, self.gripper_joint_names
+        ):
+            raise RuntimeError("Failed to set initial positions of gripper's joints")
+
+    # Meta information #
+    @property
+    def is_mobile(self) -> bool:
+        return False
+
+    # Prefix #
+    @property
+    def prefix(self) -> str:
+        return self.__prefix
+
+    # Joints #
+    @property
+    def joint_names(self) -> List[str]:
+        return self.move_base_joint_names + self.manipulator_joint_names
+
+    @property
+    def move_base_joint_names(self) -> List[str]:
+        return []
+
+    @property
+    def manipulator_joint_names(self) -> List[str]:
+        return self.arm_joint_names + self.gripper_joint_names
+
+    @classmethod
+    def get_arm_joint_names(cls, prefix: str) -> List[str]:
+        return [
+            "fork0_link_joint",
+            "main0_link_joint",
+            "fork1_link_joint",
+            "main1_link_joint",
+            "fork2_link_joint",
+            "ee_link_joint",
+            "rbs_gripper_rot_base_joint",
+        ]
+
+    @property
+    def arm_joint_names(self) -> List[str]:
+        return self.get_arm_joint_names(self.prefix)
+
+    @classmethod
+    def get_gripper_joint_names(cls, prefix: str) -> List[str]:
+        return [
+            "rbs_gripper_r_finger_joint",
+            "rbs_gripper_l_finger_joint"
+        ]
+
+    @property
+    def gripper_joint_names(self) -> List[str]:
+        return self.get_gripper_joint_names(self.prefix)
+
+    @property
+    def arm_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
+        return [
+            (-3.14159, 3.14159),
+            (-1.5708, 3.14159),
+            (-3.14159, 3.14159),
+            (-1.5708, 3.14159),
+            (-3.14159, 3.14159),
+            (-1.5708, 3.14159),
+            (-3.14159, 3.14159),
+        ]
+
+    @property
+    def gripper_joint_limits(self) -> Optional[List[Tuple[float, float]]]:
+        return [
+            (0.0, 0.064),
+            (0.0, 0.064),
+        ]
+
+    @property
+    def gripper_joints_close_towards_positive(self) -> bool:
+        return (
+            self.OPEN_GRIPPER_JOINT_POSITIONS[0]
+            < self.CLOSED_GRIPPER_JOINT_POSITIONS[0]
+        )
+
+    @property
+    def initial_arm_joint_positions(self) -> List[float]:
+        return self.__initial_arm_joint_positions
+
+    @property
+    def initial_gripper_joint_positions(self) -> List[float]:
+        return self.__initial_gripper_joint_positions
+
+    # Passive joints #
+    @property
+    def passive_joint_names(self) -> List[str]:
+        return self.manipulator_passive_joint_names + self.move_base_passive_joint_names
+
+    @property
+    def move_base_passive_joint_names(self) -> List[str]:
+        return []
+
+    @property
+    def manipulator_passive_joint_names(self) -> List[str]:
+        return self.arm_passive_joint_names + self.gripper_passive_joint_names
+
+    @property
+    def arm_passive_joint_names(self) -> List[str]:
+        return []
+
+    @property
+    def gripper_passive_joint_names(self) -> List[str]:
+        return []
+
+    # Links #
+    @classmethod
+    def get_robot_base_link_name(cls, prefix: str = "") -> str:
+        return cls.get_arm_base_link_name(prefix)
+
+    @property
+    def robot_base_link_name(self) -> str:
+        return self.get_robot_base_link_name(self.prefix)
+
+    @classmethod
+    def get_arm_base_link_name(cls, prefix: str = "") -> str:
+        # Same as `self.arm_link_names[0]``
+        return "base_link"
+
+    @property
+    def arm_base_link_name(self) -> str:
+        return self.get_arm_base_link_name(self.prefix)
+
+    @classmethod
+    def get_ee_link_name(cls, prefix: str = "") -> str:
+        return "gripper_grasp_point"
+
+    @property
+    def ee_link_name(self) -> str:
+        return self.get_ee_link_name(self.prefix)
+
+    @classmethod
+    def get_wheel_link_names(cls, prefix: str = "") -> List[str]:
+        return []
+
+    @property
+    def wheel_link_names(self) -> List[str]:
+        return self.get_wheel_link_names(self.prefix)
+
+    @classmethod
+    def get_arm_link_names(cls, prefix: str) -> List[str]:
+        return ["fork0_link",
+                "main0_link",
+                "fork1_link",
+                "main1_link",
+                "fork2_link",
+                "tool0",
+                "ee_link",
+                "rbs_gripper_rot_base_link"]
+
+    @property
+    def arm_link_names(self) -> List[str]:
+        return self.get_arm_link_names(self.prefix)
+
+    @classmethod
+    def get_gripper_link_names(cls, prefix: str) -> List[str]:
+        return [
+            "rbs_gripper_l_finger_link",
+            "rbs_gripper_r_finger_link"
+        ]
+
+    @property
+    def gripper_link_names(self) -> List[str]:
+        return self.get_gripper_link_names(self.prefix)
+
+    @classmethod
+    def get_model_file(cls) -> str:
+        return "/home/bill-finger/rbs_ws/current.urdf"
+
--- a/env_manager/rbs_gym/rbs_gym/envs/models/sensors/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/sensors/init.py
@ -0,0 +1 @@
+from .camera import Camera
--- a/env_manager/rbs_gym/rbs_gym/envs/models/sensors/camera.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/sensors/camera.py
@ -0,0 +1,324 @@
+import os
+from threading import Thread
+from typing import List, Optional, Union
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+from rbs_gym.envs.models.utils import ModelCollectionRandomizer
+
+
+class Camera(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: Union[str, None] = None,
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        static: bool = True,
+        camera_type: str = "rgbd_camera",
+        width: int = 212,
+        height: int = 120,
+        image_format: str = "R8G8B8",
+        update_rate: int = 15,
+        horizontal_fov: float = 1.567821,
+        vertical_fov: float = 1.022238,
+        clip_color: List[float] = (0.02, 1000.0),
+        clip_depth: List[float] = (0.02, 10.0),
+        noise_mean: float = None,
+        noise_stddev: float = None,
+        ros2_bridge_color: bool = False,
+        ros2_bridge_depth: bool = False,
+        ros2_bridge_points: bool = False,
+        visibility_mask: int = 0,
+        visual: Optional[str] = None,
+        # visual: Optional[str] = "intel_realsense_d435",
+    ):
+
+        # Get a unique model name
+        if name is not None:
+            model_name = get_unique_model_name(world, name)
+        else:
+            model_name = get_unique_model_name(world, camera_type)
+        self._model_name = model_name
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Get resources for visual (if enabled)
+        if visual:
+            use_mesh: bool = False
+            if "intel_realsense_d435" == visual:
+                use_mesh = True
+
+                # Get path to the model and the important directories
+                model_path = ModelCollectionRandomizer.get_collection_paths(
+                    owner="OpenRobotics",
+                    collection="",
+                    model_name="Intel RealSense D435",
+                )[0]
+
+                mesh_dir = os.path.join(model_path, "meshes")
+                texture_dir = os.path.join(model_path, "materials", "textures")
+
+                # Get path to the mesh
+                mesh_path_visual = os.path.join(mesh_dir, "realsense.dae")
+                # Make sure that it exists
+                if not os.path.exists(mesh_path_visual):
+                    raise ValueError(
+                        f"Visual mesh '{mesh_path_visual}' for Camera model is not a valid file."
+                    )
+
+                # Find PBR textures
+                albedo_map = None
+                normal_map = None
+                roughness_map = None
+                metalness_map = None
+                if texture_dir:
+                    # List all files
+                    texture_files = os.listdir(texture_dir)
+
+                    # Extract the appropriate files
+                    for texture in texture_files:
+                        texture_lower = texture.lower()
+                        if "basecolor" in texture_lower or "albedo" in texture_lower:
+                            albedo_map = os.path.join(texture_dir, texture)
+                        elif "normal" in texture_lower:
+                            normal_map = os.path.join(texture_dir, texture)
+                        elif "roughness" in texture_lower:
+                            roughness_map = os.path.join(texture_dir, texture)
+                        elif (
+                            "specular" in texture_lower or "metalness" in texture_lower
+                        ):
+                            metalness_map = os.path.join(texture_dir, texture)
+
+                if not (albedo_map and normal_map and roughness_map and metalness_map):
+                    raise ValueError(f"Not all textures for Camera model were found.")
+
+        # Create SDF string for the model
+        sdf = f'''<sdf version="1.9">
+            <model name="{model_name}">
+                <static>{static}</static>
+                <link name="{self.link_name}">
+                    <sensor name="camera" type="{camera_type}">
+                        <topic>{model_name}</topic>
+                        <always_on>true</always_on>
+                        <update_rate>{update_rate}</update_rate>
+                        <camera name="{model_name}_camera">
+                            <image>
+                                <width>{width}</width>
+                                <height>{height}</height>
+                                <format>{image_format}</format>
+                            </image>
+                            <horizontal_fov>{horizontal_fov}</horizontal_fov>
+                            <vertical_fov>{vertical_fov}</vertical_fov>
+                            <clip>
+                                <near>{clip_color[0]}</near>
+                                <far>{clip_color[1]}</far>
+                            </clip>
+                            {
+                            f"""<depth_camera>
+                                <clip>
+                                    <near>{clip_depth[0]}</near>
+                                    <far>{clip_depth[1]}</far>
+                                </clip>
+                            </depth_camera>""" if "rgbd" in model_name else ""
+                            }
+                            {
+                            f"""<noise>
+                                <type>gaussian</type>
+                                <mean>{noise_mean}</mean>
+                                <stddev>{noise_stddev}</stddev>
+                            </noise>""" if noise_mean is not None and noise_stddev is not None else ""
+                            }
+                            <visibility_mask>{visibility_mask}</visibility_mask>
+                        </camera>
+                        <visualize>true</visualize>
+                    </sensor>
+                    {
+                        f"""
+                        <visual name="{model_name}_visual_lens">
+                            <pose>-0.01 0 0 0 1.5707963 0</pose>
+                            <geometry>
+                                <cylinder>
+                                    <radius>0.02</radius>
+                                    <length>0.02</length>
+                                </cylinder>
+                            </geometry>
+                            <material>
+                                <ambient>0.0 0.8 0.0</ambient>
+                                <diffuse>0.0 0.8 0.0</diffuse>
+                                <specular>0.0 0.8 0.0</specular>
+                            </material>
+                        </visual>
+                        <visual name="{model_name}_visual_body">
+                            <pose>-0.05 0 0 0 0 0</pose>
+                            <geometry>
+                                <box>
+                                    <size>0.06 0.05 0.05</size>
+                                </box>
+                            </geometry>
+                            <material>
+                                <ambient>0.0 0.8 0.0</ambient>
+                                <diffuse>0.0 0.8 0.0</diffuse>
+                                <specular>0.0 0.8 0.0</specular>
+                            </material>
+                        </visual>
+                        """ if visual and not use_mesh else ""
+                        }
+                        {
+                        f"""
+                        <inertial>
+                            <mass>0.0615752</mass>
+                            <inertia>
+                                <ixx>9.108e-05</ixx>
+                                <ixy>0.0</ixy>
+                                <ixz>0.0</ixz>
+                                <iyy>2.51e-06</iyy>
+                                <iyz>0.0</iyz>
+                                <izz>8.931e-05</izz>
+                            </inertia>
+                        </inertial>
+                        <visual name="{model_name}_visual">
+                            <pose>0 0 0 0 0 1.5707963</pose>
+                            <geometry>
+                                <mesh>
+                                    <uri>{mesh_path_visual}</uri>
+                                    <submesh>
+                                        <name>RealSense</name>
+                                        <center>false</center>
+                                    </submesh>
+                                </mesh>
+                            </geometry>
+                            <material>
+                                <diffuse>1 1 1 1</diffuse>
+                                <specular>1 1 1 1</specular>
+                                <pbr>
+                                    <metal>
+                                        <albedo_map>{albedo_map}</albedo_map>
+                                        <normal_map>{normal_map}</normal_map>
+                                        <roughness_map>{roughness_map}</roughness_map>
+                                        <metalness_map>{metalness_map}</metalness_map>
+                                    </metal>
+                                </pbr>
+                            </material>
+                        </visual>
+                        """ if visual and use_mesh else ""
+                        }
+                </link>
+            </model>
+        </sdf>'''
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
+
+        if ros2_bridge_color or ros2_bridge_depth or ros2_bridge_points:
+            self.__threads = []
+            if ros2_bridge_color:
+                self.__threads.append(
+                    Thread(
+                        target=self.construct_ros2_bridge,
+                        args=(
+                            self.color_topic,
+                            "sensor_msgs/msg/Image",
+                            "ignition.msgs.Image",
+                        ),
+                        daemon=True,
+                    )
+                )
+
+            if ros2_bridge_depth:
+                self.__threads.append(
+                    Thread(
+                        target=self.construct_ros2_bridge,
+                        args=(
+                            self.depth_topic,
+                            "sensor_msgs/msg/Image",
+                            "ignition.msgs.Image",
+                        ),
+                        daemon=True,
+                    )
+                )
+
+            if ros2_bridge_points:
+                self.__threads.append(
+                    Thread(
+                        target=self.construct_ros2_bridge,
+                        args=(
+                            self.points_topic,
+                            "sensor_msgs/msg/PointCloud2",
+                            "ignition.msgs.PointCloudPacked",
+                        ),
+                        daemon=True,
+                    )
+                )
+
+            for thread in self.__threads:
+                thread.start()
+
+    def __del__(self):
+        if hasattr(self, "__threads"):
+            for thread in self.__threads:
+                thread.join()
+
+    @classmethod
+    def construct_ros2_bridge(self, topic: str, ros_msg: str, ign_msg: str):
+        node_name = "parameter_bridge" + topic.replace("/", "_")
+        command = (
+            f"ros2 run ros_ign_bridge parameter_bridge {topic}@{ros_msg}[{ign_msg} "
+            + f"--ros-args --remap __node:={node_name} --ros-args -p use_sim_time:=true"
+        )
+        os.system(command)
+
+    @classmethod
+    def get_frame_id(cls, model_name: str) -> str:
+        return f"{model_name}/{model_name}_link/camera"
+
+    @property
+    def frame_id(self) -> str:
+        return self.get_frame_id(self._model_name)
+
+    @classmethod
+    def get_color_topic(cls, model_name: str) -> str:
+        return f"/{model_name}/image" if "rgbd" in model_name else f"/{model_name}"
+
+    @property
+    def color_topic(self) -> str:
+        return self.get_color_topic(self._model_name)
+
+    @classmethod
+    def get_depth_topic(cls, model_name: str) -> str:
+        return (
+            f"/{model_name}/depth_image" if "rgbd" in model_name else f"/{model_name}"
+        )
+
+    @property
+    def depth_topic(self) -> str:
+        return self.get_depth_topic(self._model_name)
+
+    @classmethod
+    def get_points_topic(cls, model_name: str) -> str:
+        return f"/{model_name}/points"
+
+    @property
+    def points_topic(self) -> str:
+        return self.get_points_topic(self._model_name)
+
+    @classmethod
+    def get_link_name(cls, model_name: str) -> str:
+        return f"{model_name}_link"
+
+    @property
+    def link_name(self) -> str:
+        return self.get_link_name(self._model_name)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/init.py
@ -0,0 +1,27 @@
+from gym_gz.scenario.model_wrapper import ModelWrapper
+
+from .ground import Ground
+from .lunar_heightmap import LunarHeightmap
+from .lunar_surface import LunarSurface
+from .random_ground import RandomGround
+from .random_lunar_surface import RandomLunarSurface
+
+
+def get_terrain_model_class(terrain_type: str) -> ModelWrapper:
+    # TODO: Refactor into enum
+
+    if "flat" == terrain_type:
+        return Ground
+    elif "random_flat" == terrain_type:
+        return RandomGround
+    elif "lunar_heightmap" == terrain_type:
+        return LunarHeightmap
+    elif "lunar_surface" == terrain_type:
+        return LunarSurface
+    elif "random_lunar_surface" == terrain_type:
+        return RandomLunarSurface
+
+
+def is_terrain_type_randomizable(terrain_type: str) -> bool:
+
+    return "random_flat" == terrain_type or "random_lunar_surface" == terrain_type
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/ground.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/ground.py
@ -0,0 +1,80 @@
+from typing import List
+
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils import misc
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+
+
+class Ground(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "ground",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        size: List[float] = (1.5, 1.5),
+        collision_thickness=0.05,
+        friction: float = 5.0,
+        **kwargs,
+    ):
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Create SDF string for the model
+        sdf = f"""<sdf version="1.9">
+            <model name="{model_name}">
+                <static>true</static>
+                <link name="{model_name}_link">
+                    <collision name="{model_name}_collision">
+                        <geometry>
+                            <plane>
+                                <normal>0 0 1</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <surface>
+                            <friction>
+                                <ode>
+                                    <mu>{friction}</mu>
+                                    <mu2>{friction}</mu2>
+                                    <fdir1>0 0 0</fdir1>
+                                    <slip1>0.0</slip1>
+                                    <slip2>0.0</slip2>
+                                </ode>
+                            </friction>
+                        </surface>
+                    </collision>
+                    <visual name="{model_name}_visual">
+                        <geometry>
+                            <plane>
+                                <normal>0 0 1</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <material>
+                            <ambient>0.8 0.8 0.8 1</ambient>
+                            <diffuse>0.8 0.8 0.8 1</diffuse>
+                            <specular>0.8 0.8 0.8 1</specular>
+                        </material>
+                    </visual>
+                </link>
+            </model>
+        </sdf>"""
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_heightmap.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_heightmap.py
@ -0,0 +1,52 @@
+from typing import List
+
+from gym_gz.scenario import model_with_file, model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+from scenario import gazebo as scenario_gazebo
+
+
+class LunarHeightmap(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "lunar_heightmap",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        model_file: str = None,
+        use_fuel: bool = False,
+        **kwargs,
+    ):
+
+        # Allow passing of custom model file as an argument
+        if model_file is None:
+            model_file = self.get_model_file(fuel=use_fuel)
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_file(
+            model_file, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        super().__init__(model=model)
+
+    @classmethod
+    def get_model_file(self, fuel: bool = False) -> str:
+        if fuel:
+            raise NotImplementedError
+            return scenario_gazebo.get_model_file_from_fuel(
+                "https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/lunar_heightmap"
+            )
+        else:
+            return "lunar_heightmap"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_surface.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/lunar_surface.py
@ -0,0 +1,53 @@
+from typing import List
+
+from gym_gz.scenario import model_with_file, model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from scenario import core as scenario
+from scenario import gazebo as scenario_gazebo
+
+
+class LunarSurface(model_wrapper.ModelWrapper, model_with_file.ModelWithFile):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "lunar_surface",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        model_file: str = None,
+        use_fuel: bool = False,
+        variant: str = "tycho",
+        **kwargs,
+    ):
+
+        # Allow passing of custom model file as an argument
+        if model_file is None:
+            model_file = self.get_model_file(fuel=use_fuel, variant=variant)
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_file(
+            model_file, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        super().__init__(model=model)
+
+    @classmethod
+    def get_model_file(self, fuel: bool = False, variant: str = "tycho") -> str:
+        if fuel:
+            raise NotImplementedError
+            return scenario_gazebo.get_model_file_from_fuel(
+                f"https://fuel.ignitionrobotics.org/1.0/AndrejOrsula/models/lunar_surface_{variant}"
+            )
+        else:
+            return f"lunar_surface_{variant}"
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_ground.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_ground.py
@ -0,0 +1,135 @@
+import os
+from typing import List, Optional
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+
+class RandomGround(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "random_ground",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        size: List[float] = (1.0, 1.0),
+        collision_thickness: float = 0.05,
+        friction: float = 5.0,
+        texture_dir: Optional[str] = None,
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        if np_random is None:
+            np_random = np.random.default_rng()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Get textures from ENV variable if not directly specified
+        if not texture_dir:
+            texture_dir = os.environ.get("TEXTURE_DIRS", default="")
+
+        # Find random PBR texture
+        albedo_map = None
+        normal_map = None
+        roughness_map = None
+        metalness_map = None
+        if texture_dir:
+            if ":" in texture_dir:
+                textures = []
+                for d in texture_dir.split(":"):
+                    textures.extend([os.path.join(d, f) for f in os.listdir(d)])
+            else:
+                # Get list of the available textures
+                textures = os.listdir(texture_dir)
+
+            # Choose a random texture from these
+            random_texture_dir = str(np_random.choice(textures))
+
+            # List all files
+            texture_files = os.listdir(random_texture_dir)
+
+            # Extract the appropriate files
+            for texture in texture_files:
+                texture_lower = texture.lower()
+                if "color" in texture_lower or "albedo" in texture_lower:
+                    albedo_map = os.path.join(random_texture_dir, texture)
+                elif "normal" in texture_lower:
+                    normal_map = os.path.join(random_texture_dir, texture)
+                elif "roughness" in texture_lower:
+                    roughness_map = os.path.join(random_texture_dir, texture)
+                elif "specular" in texture_lower or "metalness" in texture_lower:
+                    metalness_map = os.path.join(random_texture_dir, texture)
+
+        # Create SDF string for the model
+        sdf = f"""<sdf version="1.9">
+            <model name="{model_name}">
+                <static>true</static>
+                <link name="{model_name}_link">
+                    <collision name="{model_name}_collision">
+                        <geometry>
+                            <plane>
+                                <normal>0 0 1</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <surface>
+                            <friction>
+                                <ode>
+                                    <mu>{friction}</mu>
+                                    <mu2>{friction}</mu2>
+                                    <fdir1>0 0 0</fdir1>
+                                    <slip1>0.0</slip1>
+                                    <slip2>0.0</slip2>
+                                </ode>
+                            </friction>
+                        </surface>
+                    </collision>
+                    <visual name="{model_name}_visual">
+                        <geometry>
+                            <plane>
+                                <normal>0 0 1</normal>
+                                <size>{size[0]} {size[1]}</size>
+                            </plane>
+                        </geometry>
+                        <material>
+                            <ambient>1 1 1 1</ambient>
+                            <diffuse>1 1 1 1</diffuse>
+                            <specular>1 1 1 1</specular>
+                            <pbr>
+                                <metal>
+                                    {"<albedo_map>%s</albedo_map>"
+                                        % albedo_map if albedo_map is not None else ""}
+                                    {"<normal_map>%s</normal_map>"
+                                        % normal_map if normal_map is not None else ""}
+                                    {"<roughness_map>%s</roughness_map>"
+                                        % roughness_map if roughness_map is not None else ""}
+                                    {"<metalness_map>%s</metalness_map>"
+                                        % metalness_map if metalness_map is not None else ""}
+                                </metal>
+                            </pbr>
+                        </material>
+                    </visual>
+                </link>
+            </model>
+        </sdf>"""
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_string(
+            sdf, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_lunar_surface.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/terrains/random_lunar_surface.py
@ -0,0 +1,57 @@
+import os
+from typing import List, Optional, Tuple
+
+import numpy as np
+from gym_gz.scenario import model_wrapper
+from gym_gz.utils.scenario import get_unique_model_name
+from numpy.random import RandomState
+from scenario import core as scenario
+
+
+class RandomLunarSurface(model_wrapper.ModelWrapper):
+    def __init__(
+        self,
+        world: scenario.World,
+        name: str = "lunar_surface",
+        position: List[float] = (0, 0, 0),
+        orientation: List[float] = (1, 0, 0, 0),
+        models_dir: Optional[str] = None,
+        np_random: Optional[RandomState] = None,
+        **kwargs,
+    ):
+
+        if np_random is None:
+            np_random = np.random.default_rng()
+
+        # Get a unique model name
+        model_name = get_unique_model_name(world, name)
+
+        # Setup initial pose
+        initial_pose = scenario.Pose(position, orientation)
+
+        # Get path to all lunar surface models
+        if not models_dir:
+            models_dir = os.environ.get("SDF_PATH_LUNAR_SURFACE", default="")
+
+        # Make sure the path exists
+        if not os.path.exists(models_dir):
+            raise ValueError(
+                f"Invalid path '{models_dir}' pointed by 'SDF_PATH_LUNAR_SURFACE' environment variable."
+            )
+
+        # Select a single model at random
+        model_dir = np_random.choice(os.listdir(models_dir))
+        sdf_filepath = os.path.join(model_dir, "model.sdf")
+
+        # Insert the model
+        ok_model = world.to_gazebo().insert_model_from_file(
+            sdf_filepath, initial_pose, model_name
+        )
+        if not ok_model:
+            raise RuntimeError("Failed to insert " + model_name)
+
+        # Get the model
+        model = world.get_model(model_name)
+
+        # Initialize base class
+        model_wrapper.ModelWrapper.__init__(self, model=model)
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/init.py
@ -0,0 +1,2 @@
+from .model_collection_randomizer import ModelCollectionRandomizer
+from .xacro2sdf import xacro2sdf
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/model_collection_randomizer.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/model_collection_randomizer.py
@ -0,0 +1,745 @@
+import glob
+import os
+from typing import List, Optional, Tuple
+
+import numpy as np
+import trimesh
+from gym_gz.utils import logger
+from numpy.random import RandomState
+from pcg_gazebo.parsers import parse_sdf
+from pcg_gazebo.parsers.sdf import create_sdf_element
+from scenario import gazebo as scenario_gazebo
+
+# Note: only models with mesh geometry are supported
+
+
+class ModelCollectionRandomizer:
+
+    _class_model_paths = None
+    __sdf_base_name = "model.sdf"
+    __configured_sdf_base_name = "model_modified.sdf"
+    __blacklisted_base_name = "BLACKLISTED"
+    __collision_mesh_dir = "meshes/collision/"
+    __collision_mesh_file_type = "stl"
+    __original_scale_base_name = "original_scale.txt"
+
+    def __init__(
+        self,
+        model_paths=None,
+        owner="GoogleResearch",
+        collection="Google Scanned Objects",
+        server="https://fuel.ignitionrobotics.org",
+        server_version="1.0",
+        unique_cache=False,
+        reset_collection=False,
+        enable_blacklisting=True,
+        np_random: Optional[RandomState] = None,
+    ):
+
+        # If enabled, the newly created objects of this class will use its own individual cache
+        # for model paths and must discover/download them on its own
+        self._unique_cache = unique_cache
+
+        # Flag that determines if models that cannot be used are blacklisted
+        self._enable_blacklisting = enable_blacklisting
+
+        # If enabled, the cache of the class used to store model paths among instances will be reset
+        if reset_collection and not self._unique_cache:
+            self._class_model_paths = None
+
+        # Get file path to all models from
+        # a) `model_paths` arg
+        # b) local cache owner (if `owner` has some models, i.e `collection` is already downloaded)
+        # c) Fuel collection (if `owner` has no models in local cache)
+        if model_paths is not None:
+            # Use arg
+            if self._unique_cache:
+                self._model_paths = model_paths
+            else:
+                self._class_model_paths = model_paths
+        else:
+            # Use local cache or Fuel
+            if self._unique_cache:
+                self._model_paths = self.get_collection_paths(
+                    owner=owner,
+                    collection=collection,
+                    server=server,
+                    server_version=server_version,
+                )
+            elif self._class_model_paths is None:
+                # Executed only once, unless the paths are reset with `reset_collection` arg
+                self._class_model_paths = self.get_collection_paths(
+                    owner=owner,
+                    collection=collection,
+                    server=server,
+                    server_version=server_version,
+                )
+
+        # Initialise rng with (with seed is desired)
+        if np_random is not None:
+            self.np_random = np_random
+        else:
+            self.np_random = np.random.default_rng()
+
+    @classmethod
+    def get_collection_paths(
+        cls,
+        owner="GoogleResearch",
+        collection="Google Scanned Objects",
+        server="https://fuel.ignitionrobotics.org",
+        server_version="1.0",
+        model_name: str = "",
+    ) -> List[str]:
+
+        # First check the local cache (for performance)
+        # Note: This unfortunately does not check if models belong to the specified collection
+        # TODO: Make sure models belong to the collection if sampled from local cache
+        model_paths = scenario_gazebo.get_local_cache_model_paths(
+            owner=owner, name=model_name
+        )
+        if len(model_paths) > 0:
+            return model_paths
+
+        # Else download the models from Fuel and then try again
+        if collection:
+            download_uri = "%s/%s/%s/collections/%s" % (
+                server,
+                server_version,
+                owner,
+                collection,
+            )
+        elif model_name:
+            download_uri = "%s/%s/%s/models/%s" % (
+                server,
+                server_version,
+                owner,
+                model_name,
+            )
+        download_command = 'ign fuel download -v 3 -t model -j %s -u "%s"' % (
+            os.cpu_count(),
+            download_uri,
+        )
+        os.system(download_command)
+
+        model_paths = scenario_gazebo.get_local_cache_model_paths(
+            owner=owner, name=model_name
+        )
+        if 0 == len(model_paths):
+            logger.error(
+                'URI "%s" is not valid and does not contain any models that are \
+                          owned by the owner of the collection'
+                % download_uri
+            )
+            pass
+
+        return model_paths
+
+    def random_model(
+        self,
+        min_scale=0.125,
+        max_scale=0.175,
+        min_mass=0.05,
+        max_mass=0.25,
+        min_friction=0.75,
+        max_friction=1.5,
+        decimation_fraction_of_visual=0.25,
+        decimation_min_faces=40,
+        decimation_max_faces=200,
+        max_faces=40000,
+        max_vertices=None,
+        component_min_faces_fraction=0.1,
+        component_max_volume_fraction=0.35,
+        fix_mtl_texture_paths=True,
+        skip_blacklisted=True,
+        return_sdf_path=True,
+    ) -> str:
+
+        # Loop until a model is found, checked for validity, configured and returned
+        # If any of these steps fail, sample another model and try again
+        # Note: Due to this behaviour, the function could stall if all models are invalid
+        # TODO: Add a simple timeout to random sampling of valid model (# of attempts or time-based)
+        while True:
+
+            # Get path to a random model from the collection
+            model_path = self.get_random_model_path()
+
+            # Check if the model is already blacklisted and skip if desired
+            if skip_blacklisted and self.is_blacklisted(model_path):
+                continue
+
+            # Check is the model is already configured
+            if self.is_configured(model_path):
+                # If so, break the loop
+                break
+
+            # Process the model and break loop only if it is valid
+            if self.process_model(
+                model_path,
+                decimation_fraction_of_visual=decimation_fraction_of_visual,
+                decimation_min_faces=decimation_min_faces,
+                decimation_max_faces=decimation_max_faces,
+                max_faces=max_faces,
+                max_vertices=max_vertices,
+                component_min_faces_fraction=component_min_faces_fraction,
+                component_max_volume_fraction=component_max_volume_fraction,
+                fix_mtl_texture_paths=fix_mtl_texture_paths,
+            ):
+                break
+
+        # Apply randomization
+        self.randomize_configured_model(
+            model_path,
+            min_scale=min_scale,
+            max_scale=max_scale,
+            min_friction=min_friction,
+            max_friction=max_friction,
+            min_mass=min_mass,
+            max_mass=max_mass,
+        )
+
+        if return_sdf_path:
+            # Return path to the configured SDF file
+            return self.get_configured_sdf_path(model_path)
+        else:
+            # Return path to the model directory
+            return model_path
+
+    def process_all_models(
+        self,
+        decimation_fraction_of_visual=0.025,
+        decimation_min_faces=8,
+        decimation_max_faces=400,
+        max_faces=40000,
+        max_vertices=None,
+        component_min_faces_fraction=0.1,
+        component_max_volume_fraction=0.35,
+        fix_mtl_texture_paths=True,
+    ):
+        if self._unique_cache:
+            model_paths = self._model_paths
+        else:
+            model_paths = self._class_model_paths
+
+        blacklist_model_counter = 0
+        for i in range(len(model_paths)):
+            if not self.process_model(
+                model_paths[i],
+                decimation_fraction_of_visual=decimation_fraction_of_visual,
+                decimation_min_faces=decimation_min_faces,
+                decimation_max_faces=decimation_max_faces,
+                max_faces=max_faces,
+                max_vertices=max_vertices,
+                component_min_faces_fraction=component_min_faces_fraction,
+                component_max_volume_fraction=component_max_volume_fraction,
+                fix_mtl_texture_paths=fix_mtl_texture_paths,
+            ):
+                blacklist_model_counter += 1
+
+            print('Processed model %i/%i "%s"' % (i, len(model_paths), model_paths[i]))
+
+        print("Number of blacklisted models: %i" % blacklist_model_counter)
+
+    def process_model(
+        self,
+        model_path,
+        decimation_fraction_of_visual=0.25,
+        decimation_min_faces=40,
+        decimation_max_faces=200,
+        max_faces=40000,
+        max_vertices=None,
+        component_min_faces_fraction=0.1,
+        component_max_volume_fraction=0.35,
+        fix_mtl_texture_paths=True,
+    ) -> bool:
+
+        # Parse the SDF of the model
+        sdf = parse_sdf(self.get_sdf_path(model_path))
+
+        # Process the model(s) contained in the SDF
+        for model in sdf.models:
+
+            # Process the link(s) of each model
+            for link in model.links:
+
+                # Get rid of the existing collisions prior to simplifying it
+                link.collisions.clear()
+
+                # Values for the total inertial properties of current link
+                # These values will be updated for each body that the link contains
+                total_mass = 0.0
+                total_inertia = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
+                common_centre_of_mass = [0.0, 0.0, 0.0]
+
+                # Go through the visuals and process them
+                for visual in link.visuals:
+
+                    # Get path to the mesh of the link's visual
+                    mesh_path = self.get_mesh_path(model_path, visual)
+
+                    # If desired, fix texture path in 'mtl' files for '.obj' mesh format
+                    if fix_mtl_texture_paths:
+                        self.fix_mtl_texture_paths(
+                            model_path, mesh_path, model.attributes["name"]
+                        )
+
+                    # Load the mesh (without materials)
+                    mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
+
+                    # Check if model has too much geometry (blacklist if needed)
+                    if not self.check_excessive_geometry(
+                        mesh, model_path, max_faces=max_faces, max_vertices=max_vertices
+                    ):
+                        return False
+
+                    # Check if model has disconnected geometry/components (blacklist if needed)
+                    if not self.check_disconnected_components(
+                        mesh,
+                        model_path,
+                        component_min_faces_fraction=component_min_faces_fraction,
+                        component_max_volume_fraction=component_max_volume_fraction,
+                    ):
+                        return False
+
+                    # Compute inertial properties for this mesh
+                    (
+                        total_mass,
+                        total_inertia,
+                        common_centre_of_mass,
+                    ) = self.sum_inertial_properties(
+                        mesh, total_mass, total_inertia, common_centre_of_mass
+                    )
+
+                    # Add decimated collision geometry to the SDF
+                    self.add_collision(
+                        mesh,
+                        link,
+                        model_path,
+                        fraction_of_visual=decimation_fraction_of_visual,
+                        min_faces=decimation_min_faces,
+                        max_faces=decimation_max_faces,
+                    )
+
+                    # Write original scale (size) into the SDF
+                    # This is used for later reference during randomization (for scale limits)
+                    self.write_original_scale(mesh, model_path)
+
+                # Make sure the link has valid inertial properties (blacklist if needed)
+                if not self.check_inertial_properties(
+                    model_path, total_mass, total_inertia
+                ):
+                    return False
+
+                # Write inertial properties to the SDF of the link
+                self.write_inertial_properties(
+                    link, total_mass, total_inertia, common_centre_of_mass
+                )
+
+        # Write the configured SDF into a file
+        sdf.export_xml(self.get_configured_sdf_path(model_path))
+        return True
+
+    def add_collision(
+        self,
+        mesh,
+        link,
+        model_path,
+        fraction_of_visual=0.05,
+        min_faces=8,
+        max_faces=750,
+        friction=1.0,
+    ):
+
+        # Determine name of path to the collistion geometry
+        collision_name = (
+            link.attributes["name"] + "_collision_" + str(len(link.collisions))
+        )
+        collision_mesh_path = self.get_collision_mesh_path(model_path, collision_name)
+
+        # Determine number of faces to keep after the decimation
+        face_count = min(
+            max(fraction_of_visual * len(mesh.faces), min_faces), max_faces
+        )
+
+        # Simplify mesh via decimation
+        collision_mesh = mesh.simplify_quadratic_decimation(face_count)
+
+        # Export the collision mesh to the appropriate location
+        os.makedirs(os.path.dirname(collision_mesh_path), exist_ok=True)
+        collision_mesh.export(
+            collision_mesh_path, file_type=self.__collision_mesh_file_type
+        )
+
+        # Create collision SDF element
+        collision = create_sdf_element("collision")
+
+        # Add collision geometry to the SDF
+        collision.geometry.mesh = create_sdf_element("mesh")
+        collision.geometry.mesh.uri = os.path.relpath(
+            collision_mesh_path, start=model_path
+        )
+
+        # Add surface friction to the SDF of collision (default to 1 and randomize later)
+        collision.surface = create_sdf_element("surface")
+        collision.surface.friction = create_sdf_element("friction", "surface")
+        collision.surface.friction.ode = create_sdf_element("ode", "collision")
+        collision.surface.friction.ode.mu = friction
+        collision.surface.friction.ode.mu2 = friction
+
+        # Add it to the SDF of the link
+        collision_name = os.path.basename(collision_mesh_path).split(".")[0]
+        link.add_collision(collision_name, collision)
+
+    def sum_inertial_properties(
+        self, mesh, total_mass, total_inertia, common_centre_of_mass, density=1.0
+    ) -> Tuple[float, float, float]:
+
+        # Arbitrary density is used here
+        # The mass will be randomized once it is fully computed for a link
+        mesh.density = density
+
+        # Tmp variable to store the mass of all previous geometry, used to determine centre of mass
+        mass_of_others = total_mass
+
+        # For each additional mesh, simply add the mass and inertia
+        total_mass += mesh.mass
+        total_inertia += mesh.moment_inertia
+
+        # Compute a common centre of mass between all previous geometry and the new mesh
+        common_centre_of_mass = [
+            mass_of_others * common_centre_of_mass[0] + mesh.mass * mesh.center_mass[0],
+            mass_of_others * common_centre_of_mass[1] + mesh.mass * mesh.center_mass[1],
+            mass_of_others * common_centre_of_mass[2] + mesh.mass * mesh.center_mass[2],
+        ] / total_mass
+
+        return total_mass, total_inertia, common_centre_of_mass
+
+    def randomize_configured_model(
+        self,
+        model_path,
+        min_scale=0.05,
+        max_scale=0.25,
+        min_mass=0.1,
+        max_mass=3.0,
+        min_friction=0.75,
+        max_friction=1.5,
+    ):
+
+        # Get path to the configured SDF file
+        configured_sdf_path = self.get_configured_sdf_path(model_path)
+
+        # Parse the configured SDF that needs to be randomized
+        sdf = parse_sdf(configured_sdf_path)
+
+        # Process the model(s) contained in the SDF
+        for model in sdf.models:
+
+            # Process the link(s) of each model
+            for link in model.links:
+
+                # Randomize scale of the link
+                self.randomize_scale(
+                    model_path, link, min_scale=min_scale, max_scale=max_scale
+                )
+
+                # Randomize inertial properties of the link
+                self.randomize_inertial(link, min_mass=min_mass, max_mass=max_mass)
+
+                # Randomize friction of the link
+                self.randomize_friction(
+                    link, min_friction=min_friction, max_friction=max_friction
+                )
+
+        # Overwrite the configured SDF file with randomized values
+        sdf.export_xml(configured_sdf_path)
+
+    def randomize_scale(self, model_path, link, min_scale=0.05, max_scale=0.25):
+
+        # Note: This function currently supports only scaling of links with single mesh geometry
+        if len(link.visuals) > 1:
+            return False
+
+        # Get a random scale for the size of mesh
+        random_scale = self.np_random.uniform(min_scale, max_scale)
+
+        # Determine a scale factor that will result in such scale for the size of mesh
+        original_mesh_scale = self.read_original_scale(model_path)
+        scale_factor = random_scale / original_mesh_scale
+
+        # Determine scale factor for inertial properties based on random scale and current scale
+        current_scale = link.visuals[0].geometry.mesh.scale.value[0]
+        inertial_scale_factor = scale_factor / current_scale
+
+        # Write scale factor into SDF for visual and collision geometry
+        link.visuals[0].geometry.mesh.scale = [scale_factor] * 3
+        link.collisions[0].geometry.mesh.scale = [scale_factor] * 3
+
+        # Recompute inertial properties according to the scale
+        link.inertial.pose.x *= inertial_scale_factor
+        link.inertial.pose.y *= inertial_scale_factor
+        link.inertial.pose.z *= inertial_scale_factor
+
+        # Mass is scaled n^3
+        link.mass = link.mass.value * inertial_scale_factor**3
+
+        # Inertia is scaled n^5
+        inertial_scale_factor_n5 = inertial_scale_factor**5
+        link.inertia.ixx = link.inertia.ixx.value * inertial_scale_factor_n5
+        link.inertia.iyy = link.inertia.iyy.value * inertial_scale_factor_n5
+        link.inertia.izz = link.inertia.izz.value * inertial_scale_factor_n5
+        link.inertia.ixy = link.inertia.ixy.value * inertial_scale_factor_n5
+        link.inertia.ixz = link.inertia.ixz.value * inertial_scale_factor_n5
+        link.inertia.iyz = link.inertia.iyz.value * inertial_scale_factor_n5
+
+    def randomize_inertial(
+        self, link, min_mass=0.1, max_mass=3.0
+    ) -> Tuple[float, float]:
+
+        random_mass = self.np_random.uniform(min_mass, max_mass)
+        mass_scale_factor = random_mass / link.mass.value
+
+        link.mass = random_mass
+        link.inertia.ixx = link.inertia.ixx.value * mass_scale_factor
+        link.inertia.iyy = link.inertia.iyy.value * mass_scale_factor
+        link.inertia.izz = link.inertia.izz.value * mass_scale_factor
+        link.inertia.ixy = link.inertia.ixy.value * mass_scale_factor
+        link.inertia.ixz = link.inertia.ixz.value * mass_scale_factor
+        link.inertia.iyz = link.inertia.iyz.value * mass_scale_factor
+
+    def randomize_friction(self, link, min_friction=0.75, max_friction=1.5):
+
+        for collision in link.collisions:
+            random_friction = self.np_random.uniform(min_friction, max_friction)
+
+            collision.surface.friction.ode.mu = random_friction
+            collision.surface.friction.ode.mu2 = random_friction
+
+    def write_inertial_properties(self, link, mass, inertia, centre_of_mass):
+
+        link.mass = mass
+
+        link.inertia.ixx = inertia[0][0]
+        link.inertia.iyy = inertia[1][1]
+        link.inertia.izz = inertia[2][2]
+        link.inertia.ixy = inertia[0][1]
+        link.inertia.ixz = inertia[0][2]
+        link.inertia.iyz = inertia[1][2]
+
+        link.inertial.pose = [
+            centre_of_mass[0],
+            centre_of_mass[1],
+            centre_of_mass[2],
+            0.0,
+            0.0,
+            0.0,
+        ]
+
+    def write_original_scale(self, mesh, model_path):
+
+        file = open(self.get_original_scale_path(model_path), "w")
+        file.write(str(mesh.scale))
+        file.close()
+
+    def read_original_scale(self, model_path) -> float:
+
+        file = open(self.get_original_scale_path(model_path), "r")
+        original_scale = file.read()
+        file.close()
+
+        return float(original_scale)
+
+    def check_excessive_geometry(
+        self, mesh, model_path, max_faces=40000, max_vertices=None
+    ) -> bool:
+
+        if max_faces is not None:
+            num_faces = len(mesh.faces)
+            if num_faces > max_faces:
+                self.blacklist_model(
+                    model_path, reason="Excessive geometry (%d faces)" % num_faces
+                )
+                return False
+
+        if max_vertices is not None:
+            num_vertices = len(mesh.vertices)
+            if num_vertices > max_vertices:
+                self.blacklist_model(
+                    model_path, reason="Excessive geometry (%d vertices)" % num_vertices
+                )
+                return False
+
+        return True
+
+    def check_disconnected_components(
+        self,
+        mesh,
+        model_path,
+        component_min_faces_fraction=0.05,
+        component_max_volume_fraction=0.1,
+    ) -> bool:
+
+        # Get a list of all connected componends inside the mesh
+        # Consider components only with `component_min_faces_fraction` percent faces
+        min_faces = round(component_min_faces_fraction * len(mesh.faces))
+        connected_components = trimesh.graph.connected_components(
+            mesh.face_adjacency, min_len=min_faces
+        )
+
+        # If more than 1 objects were detected, consider also relative volume of the meshes
+        if len(connected_components) > 1:
+            total_volume = mesh.volume
+
+            large_component_counter = 0
+            for component in connected_components:
+                submesh = mesh.copy()
+                mask = np.zeros(len(mesh.faces), dtype=np.bool)
+                mask[component] = True
+                submesh.update_faces(mask)
+
+                volume_fraction = submesh.volume / total_volume
+                if volume_fraction > component_max_volume_fraction:
+                    large_component_counter += 1
+
+                if large_component_counter > 1:
+                    self.blacklist_model(
+                        model_path,
+                        reason="Disconnected components (%d instances)"
+                        % len(connected_components),
+                    )
+                    return False
+
+        return True
+
+    def check_inertial_properties(self, model_path, mass, inertia) -> bool:
+
+        if (
+            mass < 1e-10
+            or inertia[0][0] < 1e-10
+            or inertia[1][1] < 1e-10
+            or inertia[2][2] < 1e-10
+        ):
+            self.blacklist_model(model_path, reason="Invalid inertial properties")
+            return False
+
+        return True
+
+    def get_random_model_path(self) -> str:
+
+        if self._unique_cache:
+            return self.np_random.choice(self._model_paths)
+        else:
+            return self.np_random.choice(self._class_model_paths)
+
+    def get_collision_mesh_path(self, model_path, collision_name) -> str:
+
+        return os.path.join(
+            model_path,
+            self.__collision_mesh_dir,
+            collision_name + "." + self.__collision_mesh_file_type,
+        )
+
+    def get_sdf_path(self, model_path) -> str:
+
+        return os.path.join(model_path, self.__sdf_base_name)
+
+    def get_configured_sdf_path(self, model_path) -> str:
+
+        return os.path.join(model_path, self.__configured_sdf_base_name)
+
+    def get_blacklisted_path(self, model_path) -> str:
+
+        return os.path.join(model_path, self.__blacklisted_base_name)
+
+    def get_mesh_path(self, model_path, visual_or_collision) -> str:
+
+        # TODO: This might need fixing for certain collections/models
+        mesh_uri = visual_or_collision.geometry.mesh.uri.value
+        return os.path.join(model_path, mesh_uri)
+
+    def get_original_scale_path(self, model_path) -> str:
+
+        return os.path.join(model_path, self.__original_scale_base_name)
+
+    def blacklist_model(self, model_path, reason="Unknown"):
+
+        if self._enable_blacklisting:
+            bl_file = open(self.get_blacklisted_path(model_path), "w")
+            bl_file.write(reason)
+            bl_file.close()
+        logger.warn(
+            '%s model "%s". Reason: %s.'
+            % (
+                "Blacklisting" if self._enable_blacklisting else "Skipping",
+                model_path,
+                reason,
+            )
+        )
+
+    def is_blacklisted(self, model_path) -> bool:
+
+        return os.path.isfile(self.get_blacklisted_path(model_path))
+
+    def is_configured(self, model_path) -> bool:
+
+        return os.path.isfile(self.get_configured_sdf_path(model_path))
+
+    def fix_mtl_texture_paths(self, model_path, mesh_path, model_name):
+
+        # The `.obj` files use mtl
+        if mesh_path.endswith(".obj"):
+
+            # Find all textures located in the model path, used later to relative linking
+            texture_files = glob.glob(os.path.join(model_path, "**", "textures", "*.*"))
+
+            # Find location of mtl file, if any
+            mtllib_file = None
+            with open(mesh_path, "r") as file:
+                for line in file:
+                    if "mtllib" in line:
+                        mtllib_file = line.split(" ")[-1].strip()
+                        break
+
+            if mtllib_file is not None:
+                mtllib_file = os.path.join(os.path.dirname(mesh_path), mtllib_file)
+
+                fin = open(mtllib_file, "r")
+                data = fin.read()
+                for line in data.splitlines():
+                    if "map_" in line:
+                        # Find the name of the texture/map in the mtl
+                        map_file = line.split(" ")[-1].strip()
+
+                        # Find the first match of the texture/map file
+                        for texture_file in texture_files:
+                            if os.path.basename(
+                                texture_file
+                            ) == map_file or os.path.basename(
+                                texture_file
+                            ) == os.path.basename(
+                                map_file
+                            ):
+
+                                # Make the file unique to the model (unless it already is)
+                                if model_name in texture_file:
+                                    new_texture_file_name = texture_file
+                                else:
+                                    new_texture_file_name = texture_file.replace(
+                                        map_file, model_name + "_" + map_file
+                                    )
+
+                                os.rename(texture_file, new_texture_file_name)
+
+                                # Apply the correct relative path
+                                data = data.replace(
+                                    map_file,
+                                    os.path.relpath(
+                                        new_texture_file_name,
+                                        start=os.path.dirname(mesh_path),
+                                    ),
+                                )
+                                break
+                fin.close()
+
+                # Write in the correct data
+                fout = open(mtllib_file, "w")
+                fout.write(data)
+                fout.close()
--- a/env_manager/rbs_gym/rbs_gym/envs/models/utils/xacro2sdf.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/models/utils/xacro2sdf.py
@ -0,0 +1,37 @@
+
+import subprocess
+import tempfile
+from typing import Dict, Optional, Tuple
+
+import xacro
+
+
+def xacro2sdf(
+    input_file_path: str, mappings: Dict, model_path_remap: Optional[Tuple[str, str]]
+) -> str:
+    """Convert xacro (URDF variant) with given arguments to SDF and return as a string."""
+
+    # Convert all values in mappings to strings
+    for keys, values in mappings.items():
+        mappings[keys] = str(values)
+
+    # Convert xacro to URDF
+    urdf_xml = xacro.process(input_file_name=input_file_path, mappings=mappings)
+
+    # Create temporary file for URDF (`ign sdf -p` accepts only files)
+    with tempfile.NamedTemporaryFile() as tmp_urdf:
+        with open(tmp_urdf.name, "w") as urdf_file:
+            urdf_file.write(urdf_xml)
+
+        # Convert to SDF
+        result = subprocess.run(
+            ["ign", "sdf", "-p", tmp_urdf.name], stdout=subprocess.PIPE
+        )
+        sdf_xml = result.stdout.decode("utf-8")
+
+        # Remap package name to model name, such that meshes can be located by Ignition
+        if model_path_remap is not None:
+            sdf_xml = sdf_xml.replace(model_path_remap[0], model_path_remap[1])
+
+        # Return as string
+        return sdf_xml
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/init.py
@ -0,0 +1,4 @@
+from .camera_subscriber import CameraSubscriber, CameraSubscriberStandalone
+from .twist_subscriber import TwistSubscriber
+from .joint_states import JointStates
+# from .octree import OctreeCreator
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/camera_subscriber.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/camera_subscriber.py
@ -0,0 +1,118 @@
+import sys
+from threading import Lock, Thread
+from typing import Optional, Union
+
+import rclpy
+from rclpy.callback_groups import CallbackGroup
+from rclpy.executors import SingleThreadedExecutor
+from rclpy.node import Node
+from rclpy.parameter import Parameter
+from rclpy.qos import (
+    QoSDurabilityPolicy,
+    QoSHistoryPolicy,
+    QoSProfile,
+    QoSReliabilityPolicy,
+)
+from sensor_msgs.msg import Image, PointCloud2
+
+
+class CameraSubscriber:
+    def __init__(
+        self,
+        node: Node,
+        topic: str,
+        is_point_cloud: bool,
+        callback_group: Optional[CallbackGroup] = None,
+    ):
+
+        self._node = node
+
+        # Prepare the subscriber
+        if is_point_cloud:
+            camera_msg_type = PointCloud2
+        else:
+            camera_msg_type = Image
+        self.__observation = camera_msg_type()
+        self._node.create_subscription(
+            msg_type=camera_msg_type,
+            topic=topic,
+            callback=self.observation_callback,
+            qos_profile=QoSProfile(
+                reliability=QoSReliabilityPolicy.RELIABLE,
+                durability=QoSDurabilityPolicy.VOLATILE,
+                history=QoSHistoryPolicy.KEEP_LAST,
+                depth=1,
+            ),
+            callback_group=callback_group,
+        )
+        self.__observation_mutex = Lock()
+        self.__new_observation_available = False
+
+    def observation_callback(self, msg):
+        """
+        Callback for getting observation.
+        """
+
+        self.__observation_mutex.acquire()
+        self.__observation = msg
+        self.__new_observation_available = True
+        self._node.get_logger().debug("New observation received.")
+        self.__observation_mutex.release()
+
+    def get_observation(self) -> Union[PointCloud2, Image]:
+        """
+        Get the last received observation.
+        """
+
+        self.__observation_mutex.acquire()
+        observation = self.__observation
+        self.__observation_mutex.release()
+        return observation
+
+    def reset_new_observation_checker(self):
+        """
+        Reset checker of new observations, i.e. `self.new_observation_available()`
+        """
+
+        self.__observation_mutex.acquire()
+        self.__new_observation_available = False
+        self.__observation_mutex.release()
+
+    @property
+    def new_observation_available(self):
+        """
+        Check if new observation is available since `self.reset_new_observation_checker()` was called
+        """
+
+        return self.__new_observation_available
+
+
+class CameraSubscriberStandalone(Node, CameraSubscriber):
+    def __init__(
+        self,
+        topic: str,
+        is_point_cloud: bool,
+        node_name: str = "rbs_gym_camera_sub",
+        use_sim_time: bool = True,
+    ):
+
+        try:
+            rclpy.init()
+        except Exception as e:
+            if not rclpy.ok():
+                sys.exit(f"ROS 2 context could not be initialised: {e}")
+
+        Node.__init__(self, node_name)
+        self.set_parameters(
+            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
+        )
+
+        CameraSubscriber.__init__(
+            self, node=self, topic=topic, is_point_cloud=is_point_cloud
+        )
+
+        # Spin the node in a separate thread
+        self._executor = SingleThreadedExecutor()
+        self._executor.add_node(self)
+        self._executor_thread = Thread(target=self._executor.spin, daemon=True, args=())
+        self._executor_thread.start()
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/joint_states.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/joint_states.py
@ -0,0 +1,107 @@
+from array import array
+from threading import Lock
+from typing import Optional
+
+from rclpy.callback_groups import CallbackGroup
+from rclpy.node import Node
+from rclpy.qos import (
+    QoSDurabilityPolicy,
+    QoSHistoryPolicy,
+    QoSProfile,
+    QoSReliabilityPolicy,
+)
+
+from sensor_msgs.msg import JointState
+
+class JointStates:
+    def __init__(
+        self,
+        node: Node,
+        topic: str,
+        callback_group: Optional[CallbackGroup] = None,
+    ):
+
+        self._node = node
+
+        self.__observation = JointState()
+        self._node.create_subscription(
+            msg_type=JointState,
+            topic=topic,
+            callback=self.observation_callback,
+            qos_profile=QoSProfile(
+                reliability=QoSReliabilityPolicy.RELIABLE,
+                durability=QoSDurabilityPolicy.VOLATILE,
+                history=QoSHistoryPolicy.KEEP_LAST,
+                depth=1,
+            ),
+            callback_group=callback_group,
+        )
+        self.__observation_mutex = Lock()
+        self.__new_observation_available = False
+
+        self.__observation.position
+
+    def observation_callback(self, msg):
+        """
+        Callback for getting observation.
+        """
+
+        self.__observation_mutex.acquire()
+        self.__observation = msg
+        self.__new_observation_available = True
+        self._node.get_logger().debug("New observation received.")
+        self.__observation_mutex.release()
+
+    def get_observation(self) -> JointState:
+        """
+        Get the last received observation.
+        """
+        self.__observation_mutex.acquire()
+        observation = self.__observation
+        self.__observation_mutex.release()
+        return observation
+
+    def get_positions(self) -> array:
+        """
+        Get the last recorded observation position
+        """
+        self.__observation_mutex.acquire()
+        observation = self.__observation.position
+        self.__observation_mutex.release()
+        return observation
+
+    def get_velocities(self) -> array:
+        """
+        Get the last recorded observation velocity
+        """
+        self.__observation_mutex.acquire()
+        observation = self.__observation.velocity
+        self.__observation_mutex.release()
+        return observation
+
+    def get_efforts(self) -> array:
+        """
+        Get the last recorded observation effort
+        """
+        self.__observation_mutex.acquire()
+        observation = self.__observation.effort
+        self.__observation_mutex.release()
+        return observation
+
+    def reset_new_observation_checker(self):
+        """
+        Reset checker of new observations, i.e. `self.new_observation_available()`
+        """
+
+        self.__observation_mutex.acquire()
+        self.__new_observation_available = False
+        self.__observation_mutex.release()
+
+    @property
+    def new_observation_available(self):
+        """
+        Check if new observation is available since `self.reset_new_observation_checker()` was called
+        """
+
+        return self.__new_observation_available
+
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/octree.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/octree.py
@ -0,0 +1,211 @@
+from typing import List, Tuple
+
+import numpy as np
+import ocnn
+import open3d
+import torch
+from rclpy.node import Node
+from sensor_msgs.msg import PointCloud2
+
+from rbs_gym.envs.utils import Tf2Listener, conversions
+
+
+class OctreeCreator:
+    def __init__(
+        self,
+        node: Node,
+        tf2_listener: Tf2Listener,
+        reference_frame_id: str,
+        min_bound: Tuple[float, float, float] = (-1.0, -1.0, -1.0),
+        max_bound: Tuple[float, float, float] = (1.0, 1.0, 1.0),
+        include_color: bool = False,
+        # Note: For efficiency, the first channel of RGB is used for intensity
+        include_intensity: bool = False,
+        depth: int = 4,
+        full_depth: int = 2,
+        adaptive: bool = False,
+        adp_depth: int = 4,
+        normals_radius: float = 0.05,
+        normals_max_nn: int = 10,
+        node_dis: bool = True,
+        node_feature: bool = False,
+        split_label: bool = False,
+        th_normal: float = 0.1,
+        th_distance: float = 2.0,
+        extrapolate: bool = False,
+        save_pts: bool = False,
+        key2xyz: bool = False,
+        debug_draw: bool = False,
+        debug_write_octree: bool = False,
+    ):
+
+        self._node = node
+
+        # Listener of tf2 transforms is shared with the owner
+        self.__tf2_listener = tf2_listener
+
+        # Parameters
+        self._reference_frame_id = reference_frame_id
+        self._min_bound = min_bound
+        self._max_bound = max_bound
+        self._include_color = include_color
+        self._include_intensity = include_intensity
+        self._normals_radius = normals_radius
+        self._normals_max_nn = normals_max_nn
+        self._debug_draw = debug_draw
+        self._debug_write_octree = debug_write_octree
+
+        # Create a converter between points and octree
+        self._points_to_octree = ocnn.Points2Octree(
+            depth=depth,
+            full_depth=full_depth,
+            node_dis=node_dis,
+            node_feature=node_feature,
+            split_label=split_label,
+            adaptive=adaptive,
+            adp_depth=adp_depth,
+            th_normal=th_normal,
+            th_distance=th_distance,
+            extrapolate=extrapolate,
+            save_pts=save_pts,
+            key2xyz=key2xyz,
+            bb_min=min_bound,
+            bb_max=max_bound,
+        )
+
+    def __call__(self, ros_point_cloud2: PointCloud2) -> torch.Tensor:
+
+        # Convert to Open3D PointCloud
+        open3d_point_cloud = conversions.pointcloud2_to_open3d(
+            ros_point_cloud2=ros_point_cloud2,
+            include_color=self._include_color,
+            include_intensity=self._include_intensity,
+        )
+
+        # Preprocess point cloud (transform to robot frame, crop to workspace and estimate normals)
+        open3d_point_cloud = self.preprocess_point_cloud(
+            open3d_point_cloud=open3d_point_cloud,
+            camera_frame_id=ros_point_cloud2.header.frame_id,
+            reference_frame_id=self._reference_frame_id,
+            min_bound=self._min_bound,
+            max_bound=self._max_bound,
+            normals_radius=self._normals_radius,
+            normals_max_nn=self._normals_max_nn,
+        )
+
+        # Draw if needed
+        if self._debug_draw:
+            open3d.visualization.draw_geometries(
+                [
+                    open3d_point_cloud,
+                    open3d.geometry.TriangleMesh.create_coordinate_frame(
+                        size=0.2, origin=[0.0, 0.0, 0.0]
+                    ),
+                ],
+                point_show_normal=True,
+            )
+
+        # Construct octree from such point cloud
+        octree = self.construct_octree(
+            open3d_point_cloud,
+            include_color=self._include_color,
+            include_intensity=self._include_intensity,
+        )
+
+        # Write if needed
+        if self._debug_write_octree:
+            ocnn.write_octree(octree, "octree.octree")
+
+        return octree
+
+    def preprocess_point_cloud(
+        self,
+        open3d_point_cloud: open3d.geometry.PointCloud,
+        camera_frame_id: str,
+        reference_frame_id: str,
+        min_bound: List[float],
+        max_bound: List[float],
+        normals_radius: float,
+        normals_max_nn: int,
+    ) -> open3d.geometry.PointCloud:
+
+        # Check if point cloud has any points
+        if not open3d_point_cloud.has_points():
+            self._node.get_logger().warn(
+                "Point cloud has no points. Pre-processing skipped."
+            )
+            return open3d_point_cloud
+
+        # Get transformation from camera to robot and use it to transform point
+        # cloud into robot's base coordinate frame
+        if camera_frame_id != reference_frame_id:
+            transform = self.__tf2_listener.lookup_transform_sync(
+                target_frame=reference_frame_id, source_frame=camera_frame_id
+            )
+            transform_mat = conversions.transform_to_matrix(transform=transform)
+            open3d_point_cloud = open3d_point_cloud.transform(transform_mat)
+
+        # Crop point cloud to include only the workspace
+        open3d_point_cloud = open3d_point_cloud.crop(
+            bounding_box=open3d.geometry.AxisAlignedBoundingBox(
+                min_bound=min_bound, max_bound=max_bound
+            )
+        )
+
+        # Check if any points remain in the area after cropping
+        if not open3d_point_cloud.has_points():
+            self._node.get_logger().warn(
+                "Point cloud has no points after cropping it to the workspace volume."
+            )
+            return open3d_point_cloud
+
+        # Estimate normal vector for each cloud point and orient these towards the camera
+        open3d_point_cloud.estimate_normals(
+            search_param=open3d.geometry.KDTreeSearchParamHybrid(
+                radius=normals_radius, max_nn=normals_max_nn
+            ),
+            fast_normal_computation=True,
+        )
+
+        open3d_point_cloud.orient_normals_towards_camera_location(
+            camera_location=transform_mat[0:3, 3]
+        )
+
+        return open3d_point_cloud
+
+    def construct_octree(
+        self,
+        open3d_point_cloud: open3d.geometry.PointCloud,
+        include_color: bool,
+        include_intensity: bool,
+    ) -> torch.Tensor:
+
+        # In case the point cloud has no points, add a single point
+        # This is a workaround because I was not able to create an empty octree without getting a segfault
+        # TODO: Figure out a better way of making an empty octree (it does not occur if setup correctly, so probably not worth it)
+        if not open3d_point_cloud.has_points():
+            open3d_point_cloud.points.append(
+                (
+                    (self._min_bound[0] + self._max_bound[0]) / 2,
+                    (self._min_bound[1] + self._max_bound[1]) / 2,
+                    (self._min_bound[2] + self._max_bound[2]) / 2,
+                )
+            )
+            open3d_point_cloud.normals.append((0.0, 0.0, 0.0))
+            if include_color or include_intensity:
+                open3d_point_cloud.colors.append((0.0, 0.0, 0.0))
+
+        # Convert open3d point cloud into octree points
+        octree_points = conversions.open3d_point_cloud_to_octree_points(
+            open3d_point_cloud=open3d_point_cloud,
+            include_color=include_color,
+            include_intensity=include_intensity,
+        )
+
+        # Convert octree points into 1D Tensor (via ndarray)
+        # Note: Copy of points here is necessary as ndarray would otherwise be immutable
+        octree_points_ndarray = np.frombuffer(np.copy(octree_points.buffer()), np.uint8)
+        octree_points_tensor = torch.from_numpy(octree_points_ndarray)
+
+        # Finally, create an octree from the points
+        return self._points_to_octree(octree_points_tensor)
--- a/env_manager/rbs_gym/rbs_gym/envs/observation/twist_subscriber.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/observation/twist_subscriber.py
@ -0,0 +1,81 @@
+import sys
+from threading import Lock, Thread
+from typing import Optional, Union
+
+import rclpy
+from rclpy.callback_groups import CallbackGroup
+from rclpy.executors import SingleThreadedExecutor
+from rclpy.node import Node
+from rclpy.parameter import Parameter
+from rclpy.qos import (
+    QoSDurabilityPolicy,
+    QoSHistoryPolicy,
+    QoSProfile,
+    QoSReliabilityPolicy,
+)
+from geometry_msgs.msg import TwistStamped
+
+class TwistSubscriber:
+    def __init__(
+        self,
+        node: Node,
+        topic: str,
+        callback_group: Optional[CallbackGroup] = None,
+    ):
+
+        self._node = node
+
+        self.__observation = TwistStamped()
+        self._node.create_subscription(
+            msg_type=TwistStamped,
+            topic=topic,
+            callback=self.observation_callback,
+            qos_profile=QoSProfile(
+                reliability=QoSReliabilityPolicy.RELIABLE,
+                durability=QoSDurabilityPolicy.VOLATILE,
+                history=QoSHistoryPolicy.KEEP_LAST,
+                depth=1,
+            ),
+            callback_group=callback_group,
+        )
+        self.__observation_mutex = Lock()
+        self.__new_observation_available = False
+
+    def observation_callback(self, msg):
+        """
+        Callback for getting observation.
+        """
+
+        self.__observation_mutex.acquire()
+        self.__observation = msg
+        self.__new_observation_available = True
+        self._node.get_logger().debug("New observation received.")
+        self.__observation_mutex.release()
+
+    def get_observation(self) -> TwistStamped:
+        """
+        Get the last received observation.
+        """
+
+        self.__observation_mutex.acquire()
+        observation = self.__observation
+        self.__observation_mutex.release()
+        return observation
+
+    def reset_new_observation_checker(self):
+        """
+        Reset checker of new observations, i.e. `self.new_observation_available()`
+        """
+
+        self.__observation_mutex.acquire()
+        self.__new_observation_available = False
+        self.__observation_mutex.release()
+
+    @property
+    def new_observation_available(self):
+        """
+        Check if new observation is available since `self.reset_new_observation_checker()` was called
+        """
+
+        return self.__new_observation_available
+
--- a/env_manager/rbs_gym/rbs_gym/envs/randomizers/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/randomizers/init.py
@ -0,0 +1 @@
+from .manipulation import ManipulationGazeboEnvRandomizer
--- a/env_manager/rbs_gym/rbs_gym/envs/randomizers/manipulation.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/randomizers/manipulation.py
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/init.py
@ -0,0 +1,4 @@
+from .curriculums import *
+# from .grasp import *
+# from .grasp_planetary import *
+from .reach import *
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/init.py
@ -0,0 +1 @@
+from .grasp import GraspCurriculum
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/common.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/common.py
@ -0,0 +1,700 @@
+from __future__ import annotations
+
+import enum
+import itertools
+import math
+from collections import deque
+from typing import Callable, Deque, Dict, Optional, Tuple, Type
+
+import numpy as np
+from gym_gz.base.task import Task
+from gym_gz.utils.typing import Reward
+from tf2_ros.buffer_interface import TypeException
+
+INFO_MEAN_STEP_KEY: str = "__mean_step__"
+INFO_MEAN_EPISODE_KEY: str = "__mean_episode__"
+
+
+@enum.unique
+class CurriculumStage(enum.Enum):
+    """
+    Ordered enum that represents stages of a curriculum for RL task.
+    """
+
+    @classmethod
+    def first(self) -> CurriculumStage:
+
+        return self(1)
+
+    @classmethod
+    def last(self) -> CurriculumStage:
+
+        return self(len(self))
+
+    def next(self) -> Optional[CurriculumStage]:
+
+        next_value = self.value + 1
+
+        if next_value > self.last().value:
+            return None
+        else:
+            return self(next_value)
+
+    def previous(self) -> Optional[CurriculumStage]:
+
+        previous_value = self.value - 1
+
+        if previous_value < self.first().value:
+            return None
+        else:
+            return self(previous_value)
+
+
+class StageRewardCurriculum:
+    """
+    Curriculum that begins to compute rewards for a stage once all previous stages are complete.
+    """
+
+    PERSISTENT_ID: str = "PERSISTENT"
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        curriculum_stage: Type[CurriculumStage],
+        stage_reward_multiplier: float,
+        dense_reward: bool = False,
+        **kwargs,
+    ):
+
+        if 0 == len(curriculum_stage):
+            raise TypeException(f"{curriculum_stage} has length of 0")
+
+        self.__use_dense_reward = dense_reward
+        if self.__use_dense_reward:
+            raise ValueError(
+                "Dense reward is currently not implemented for any curriculum"
+            )
+
+        # Setup internals
+        self._stage_type = curriculum_stage
+        self._stage_reward_functions: Dict[curriculum_stage, Callable] = {
+            curriculum_stage(stage): getattr(self, f"get_reward_{stage.name}")
+            for stage in iter(curriculum_stage)
+        }
+        self.__stage_reward_multipliers: Dict[curriculum_stage, float] = {
+            curriculum_stage(stage): stage_reward_multiplier ** (stage.value - 1)
+            for stage in iter(curriculum_stage)
+        }
+
+        self.stages_completed_this_episode: Dict[curriculum_stage, bool] = {
+            curriculum_stage(stage): False for stage in iter(curriculum_stage)
+        }
+        self.__stages_rewards_this_episode: Dict[curriculum_stage, float] = {
+            curriculum_stage(stage): 0.0 for stage in iter(curriculum_stage)
+        }
+        self.__stages_rewards_this_episode[self.PERSISTENT_ID] = 0.0
+
+        self.__episode_succeeded: bool = False
+        self.__episode_failed: bool = False
+
+    def get_reward(self, only_last_stage: bool = False, **kwargs) -> Reward:
+
+        reward = 0.0
+
+        # Determine the stage at which to start computing reward [performance - done stages give no reward]
+        if only_last_stage:
+            first_stage_to_process = self._stage_type.last()
+        else:
+            for stage in iter(self._stage_type):
+                if not self.stages_completed_this_episode[stage]:
+                    first_stage_to_process = stage
+                    break
+
+        # Iterate over all stages that might need to be processed
+        for stage in range(first_stage_to_process.value, len(self._stage_type) + 1):
+            stage = self._stage_type(stage)
+
+            # Compute reward for the current stage
+            stage_reward = self._stage_reward_functions[stage](**kwargs)
+            # Multiply by the reward multiplier
+            stage_reward *= self.__stage_reward_multipliers[stage]
+            # Add to the total step reward
+            reward += stage_reward
+            # Add reward to the list for info
+            self.__stages_rewards_this_episode[stage] += stage_reward
+
+            # Break if stage is not yet completed [performance - next stages won't give any reward]
+            if not self.stages_completed_this_episode[stage]:
+                break
+
+        # If the last stage is complete, the episode has succeeded
+        self.__episode_succeeded = self.stages_completed_this_episode[
+            self._stage_type.last()
+        ]
+        if self.__episode_succeeded:
+            return reward
+
+        # Add persistent reward that is added regardless of the episode (unless task already succeeded)
+        persistent_reward = self.get_persistent_reward(**kwargs)
+        # Add to the total step reward
+        reward += persistent_reward
+        # Add reward to the list for info
+        self.__stages_rewards_this_episode[self.PERSISTENT_ID] += persistent_reward
+
+        return reward
+
+    def is_done(self) -> bool:
+
+        if self.__episode_succeeded:
+            # The episode ended with success
+            self.on_episode_success()
+            return True
+        elif self.__episode_failed:
+            # The episode ended due to failure
+            self.on_episode_failure()
+            return True
+        else:
+            # Otherwise, the episode is not yet done
+            return False
+
+    def get_info(self) -> Dict:
+
+        # Whether the episode succeeded
+        info = {
+            "is_success": self.__episode_succeeded,
+        }
+
+        # What stage was reached during this episode so far
+        for stage in iter(self._stage_type):
+            reached_stage = stage
+            if not self.stages_completed_this_episode[stage]:
+                break
+        info.update(
+            {
+                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_reached_stage_mean": reached_stage.value,
+            }
+        )
+
+        # Rewards for the individual stages
+        info.update(
+            {
+                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_rew_mean_{stage.value}_{stage.name.lower()}": self.__stages_rewards_this_episode[
+                    stage
+                ]
+                for stage in iter(self._stage_type)
+            }
+        )
+        info.update(
+            {
+                f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}ep_rew_mean_{self.PERSISTENT_ID.lower()}": self.__stages_rewards_this_episode[
+                    self.PERSISTENT_ID
+                ]
+            }
+        )
+
+        return info
+
+    def reset_task(self):
+
+        if not (self.__episode_succeeded or self.__episode_failed):
+            # The episode ended due to timeout
+            self.on_episode_timeout()
+
+        # Reset internals
+        self.stages_completed_this_episode = dict.fromkeys(
+            self.stages_completed_this_episode, False
+        )
+        self.__stages_rewards_this_episode = dict.fromkeys(
+            self.__stages_rewards_this_episode, 0.0
+        )
+        self.__stages_rewards_this_episode[self.PERSISTENT_ID] = 0.0
+        self.__episode_succeeded = False
+        self.__episode_failed = False
+
+    @property
+    def episode_succeeded(self) -> bool:
+
+        return self.__episode_succeeded
+
+    @episode_succeeded.setter
+    def episode_succeeded(self, value: bool):
+
+        self.__episode_succeeded = value
+
+    @property
+    def episode_failed(self) -> bool:
+
+        return self.__episode_failed
+
+    @episode_failed.setter
+    def episode_failed(self, value: bool):
+
+        self.__episode_failed = value
+
+    @property
+    def use_dense_reward(self) -> bool:
+
+        return self.__use_dense_reward
+
+    def get_persistent_reward(self, **kwargs) -> float:
+        """
+        Virtual method.
+        """
+
+        reward = 0.0
+
+        return reward
+
+    def on_episode_success(self):
+        """
+        Virtual method.
+        """
+
+        pass
+
+    def on_episode_failure(self):
+        """
+        Virtual method.
+        """
+
+        pass
+
+    def on_episode_timeout(self):
+        """
+        Virtual method.
+        """
+
+        pass
+
+
+class SuccessRateImpl:
+    """
+    Moving average over the success rate of last N episodes.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        initial_success_rate: float = 0.0,
+        rolling_average_n: int = 100,
+        **kwargs,
+    ):
+
+        self.__success_rate = initial_success_rate
+        self.__rolling_average_n = rolling_average_n
+
+        # Setup internals
+        self.__previous_success_rate_weight: int = 0
+        self.__collected_samples: int = 0
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}_success_rate": self.__success_rate,
+        }
+
+        return info
+
+    def update_success_rate(self, is_success: bool):
+
+        # Until `rolling_average_n` is reached, use number of collected samples during computations
+        if self.__collected_samples < self.__rolling_average_n:
+            self.__previous_success_rate_weight = self.__collected_samples
+            self.__collected_samples += 1
+
+        self.__success_rate = (
+            self.__previous_success_rate_weight * self.__success_rate
+            + float(is_success)
+        ) / self.__collected_samples
+
+    @property
+    def success_rate(self) -> float:
+
+        return self.__success_rate
+
+
+class WorkspaceScaleCurriculum:
+    """
+    Curriculum that increases the workspace size as the success rate increases.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        task: Task,
+        success_rate_impl: SuccessRateImpl,
+        min_workspace_scale: float,
+        max_workspace_volume: Tuple[float, float, float],
+        max_workspace_scale_success_rate_threshold: float,
+        **kwargs,
+    ):
+
+        self.__task = task
+        self.__success_rate_impl = success_rate_impl
+        self.__min_workspace_scale = min_workspace_scale
+        self.__max_workspace_volume = max_workspace_volume
+        self.__max_workspace_scale_success_rate_threshold = (
+            max_workspace_scale_success_rate_threshold
+        )
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}workspace_scale": self.__workspace_scale,
+        }
+
+        return info
+
+    def reset_task(self):
+
+        # Update workspace size
+        self.__update_workspace_size()
+
+    def __update_workspace_size(self):
+
+        self.__workspace_scale = min(
+            1.0,
+            max(
+                self.__min_workspace_scale,
+                self.__success_rate_impl.success_rate
+                / self.__max_workspace_scale_success_rate_threshold,
+            ),
+        )
+
+        workspace_volume_new = (
+            self.__workspace_scale * self.__max_workspace_volume[0],
+            self.__workspace_scale * self.__max_workspace_volume[1],
+            # Z workspace is currently kept the same on purpose
+            self.__max_workspace_volume[2],
+        )
+        workspace_volume_half_new = (
+            workspace_volume_new[0] / 2,
+            workspace_volume_new[1] / 2,
+            workspace_volume_new[2] / 2,
+        )
+        workspace_min_bound_new = (
+            self.__task.workspace_centre[0] - workspace_volume_half_new[0],
+            self.__task.workspace_centre[1] - workspace_volume_half_new[1],
+            self.__task.workspace_centre[2] - workspace_volume_half_new[2],
+        )
+        workspace_max_bound_new = (
+            self.__task.workspace_centre[0] + workspace_volume_half_new[0],
+            self.__task.workspace_centre[1] + workspace_volume_half_new[1],
+            self.__task.workspace_centre[2] + workspace_volume_half_new[2],
+        )
+
+        self.__task.add_task_parameter_overrides(
+            {
+                "workspace_volume": workspace_volume_new,
+                "workspace_min_bound": workspace_min_bound_new,
+                "workspace_max_bound": workspace_max_bound_new,
+            }
+        )
+
+
+class ObjectSpawnVolumeScaleCurriculum:
+    """
+    Curriculum that increases the object spawn volume as the success rate increases.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        task: Task,
+        success_rate_impl: SuccessRateImpl,
+        min_object_spawn_volume_scale: float,
+        max_object_spawn_volume: Tuple[float, float, float],
+        max_object_spawn_volume_scale_success_rate_threshold: float,
+        **kwargs,
+    ):
+
+        self.__task = task
+        self.__success_rate_impl = success_rate_impl
+        self.__min_object_spawn_volume_scale = min_object_spawn_volume_scale
+        self.__max_object_spawn_volume = max_object_spawn_volume
+        self.__max_object_spawn_volume_scale_success_rate_threshold = (
+            max_object_spawn_volume_scale_success_rate_threshold
+        )
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}{INFO_MEAN_EPISODE_KEY}object_spawn_volume_scale": self.__object_spawn_volume_scale,
+        }
+
+        return info
+
+    def reset_task(self):
+
+        # Update object_spawn_volume size
+        self.__update_object_spawn_volume_size()
+
+    def __update_object_spawn_volume_size(self):
+
+        self.__object_spawn_volume_scale = min(
+            1.0,
+            max(
+                self.__min_object_spawn_volume_scale,
+                self.__success_rate_impl.success_rate
+                / self.__max_object_spawn_volume_scale_success_rate_threshold,
+            ),
+        )
+
+        object_spawn_volume_volume_new = (
+            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[0],
+            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[1],
+            self.__object_spawn_volume_scale * self.__max_object_spawn_volume[2],
+        )
+
+        self.__task.add_randomizer_parameter_overrides(
+            {
+                "object_random_spawn_volume": object_spawn_volume_volume_new,
+            }
+        )
+
+
+class ObjectCountCurriculum:
+    """
+    Curriculum that increases the number of objects as the success rate increases.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        task: Task,
+        success_rate_impl: SuccessRateImpl,
+        object_count_min: int,
+        object_count_max: int,
+        max_object_count_success_rate_threshold: float,
+        **kwargs,
+    ):
+
+        self.__task = task
+        self.__success_rate_impl = success_rate_impl
+        self.__object_count_min = object_count_min
+        self.__object_count_max = object_count_max
+        self.__max_object_count_success_rate_threshold = (
+            max_object_count_success_rate_threshold
+        )
+
+        self.__object_count_min_max_diff = object_count_max - object_count_min
+        if self.__object_count_min_max_diff < 0:
+            raise Exception(
+                "'object_count_min' cannot be larger than 'object_count_max'"
+            )
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}object_count": self.__object_count,
+        }
+
+        return info
+
+    def reset_task(self):
+
+        # Update object count
+        self.__update_object_count()
+
+    def __update_object_count(self):
+
+        self.__object_count = min(
+            self.__object_count_max,
+            math.floor(
+                self.__object_count_min
+                + (
+                    self.__success_rate_impl.success_rate
+                    / self.__max_object_count_success_rate_threshold
+                )
+                * self.__object_count_min_max_diff
+            ),
+        )
+
+        self.__task.add_randomizer_parameter_overrides(
+            {
+                "object_count": self.__object_count,
+            }
+        )
+
+
+class ArmStuckChecker:
+    """
+    Checker for arm getting stuck.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        task: Task,
+        arm_stuck_n_steps: int,
+        arm_stuck_min_joint_difference_norm: float,
+        **kwargs,
+    ):
+
+        self.__task = task
+        self.__arm_stuck_min_joint_difference_norm = arm_stuck_min_joint_difference_norm
+
+        # List of previous join positions (used to compute difference norm with an older previous reading)
+        self.__previous_joint_positions: Deque[np.ndarray] = deque(
+            [], maxlen=arm_stuck_n_steps
+        )
+        # Counter of how many time the robot got stuck
+        self.__robot_stuck_total_counter: int = 0
+
+        # Initialize list of indices for the arm.
+        # It is assumed that these indices do not change during the operation
+        self.__arm_joint_indices = None
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}robot_stuck_count": self.__robot_stuck_total_counter,
+        }
+
+        return info
+
+    def reset_task(self):
+
+        self.__previous_joint_positions.clear()
+
+        joint_positions = self.__get_arm_joint_positions()
+        if joint_positions is not None:
+            self.__previous_joint_positions.append(joint_positions)
+
+    def is_robot_stuck(self) -> bool:
+
+        # Get current position and append to the list of previous ones
+        current_joint_positions = self.__get_arm_joint_positions()
+        if current_joint_positions is not None:
+            self.__previous_joint_positions.append(current_joint_positions)
+
+        # Stop checking if there is not yet enough entries in the list
+        if (
+            len(self.__previous_joint_positions)
+            < self.__previous_joint_positions.maxlen
+        ):
+            return False
+
+        # Make sure the length of joint position matches
+        if len(current_joint_positions) != len(self.__previous_joint_positions[0]):
+            return False
+
+        # Compute joint difference norm only with the `t - arm_stuck_n_steps` entry first (performance reason)
+        joint_difference_norm = np.linalg.norm(
+            current_joint_positions - self.__previous_joint_positions[0]
+        )
+
+        # If the difference is large enough, the arm does not appear to be stuck, so skip computing all other entries
+        if joint_difference_norm > self.__arm_stuck_min_joint_difference_norm:
+            return False
+
+        # If it is too small, consider all other entries as well
+        joint_difference_norms = np.linalg.norm(
+            current_joint_positions
+            - list(itertools.islice(self.__previous_joint_positions, 1, None)),
+            axis=1,
+        )
+
+        # Return true (stuck) if all joint difference entries are too small
+        is_stuck = all(
+            joint_difference_norms < self.__arm_stuck_min_joint_difference_norm
+        )
+        self.__robot_stuck_total_counter += int(is_stuck)
+        return is_stuck
+
+    def __get_arm_joint_positions(self) -> Optional[np.ndarray[float]]:
+
+        joint_state = self.__task.moveit2.joint_state
+
+        if joint_state is None:
+            return None
+
+        if self.__arm_joint_indices is None:
+            self.__arm_joint_indices = [
+                i
+                for i, joint_name in enumerate(joint_state.name)
+                if joint_name in self.__task.robot_arm_joint_names
+            ]
+
+        return np.take(joint_state.position, self.__arm_joint_indices)
+
+
+class AttributeCurriculum:
+    """
+    Curriculum that increases the value of an attribute (e.g. requirement) as the success rate increases.
+    Currently support only attributes that are increasing.
+    """
+
+    INFO_CURRICULUM_PREFIX: str = "curriculum/"
+
+    def __init__(
+        self,
+        success_rate_impl: SuccessRateImpl,
+        attribute_owner: Type,
+        attribute_name: str,
+        initial_value: float,
+        target_value: float,
+        target_value_threshold: float,
+        **kwargs,
+    ):
+
+        self.__success_rate_impl = success_rate_impl
+        self.__attribute_owner = attribute_owner
+        self.__attribute_name = attribute_name
+        self.__initial_value = initial_value
+        self.__target_value_threshold = target_value_threshold
+
+        # Initialise current value of the attribute
+        self.__current_value = initial_value
+
+        # Store difference for faster computations
+        self.__value_diff = target_value - initial_value
+
+    def get_info(self) -> Dict:
+
+        info = {
+            f"{self.INFO_CURRICULUM_PREFIX}{self.__attribute_name}": self.__current_value,
+        }
+
+        return info
+
+    def reset_task(self):
+
+        # Update object count
+        self.__update_attribute()
+
+    def __update_attribute(self):
+
+        scale = min(
+            1.0,
+            max(
+                self.__initial_value,
+                self.__success_rate_impl.success_rate / self.__target_value_threshold,
+            ),
+        )
+
+        self.__current_value = self.__initial_value + (scale * self.__value_diff)
+
+        if hasattr(self.__attribute_owner, self.__attribute_name):
+            setattr(self.__attribute_owner, self.__attribute_name, self.__current_value)
+        elif hasattr(self.__attribute_owner, f"_{self.__attribute_name}"):
+            setattr(
+                self.__attribute_owner,
+                f"_{self.__attribute_name}",
+                self.__current_value,
+            )
+        elif hasattr(self.__attribute_owner, f"__{self.__attribute_name}"):
+            setattr(
+                self.__attribute_owner,
+                f"__{self.__attribute_name}",
+                self.__current_value,
+            )
+        else:
+            raise Exception(
+                f"Attribute owner '{self.__attribute_owner}' does not have any attribute named {self.__attribute_name}."
+            )
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/grasp.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/curriculums/grasp.py
@ -0,0 +1,341 @@
+from typing import Dict, List, Tuple
+
+from gym_gz.base.task import Task
+
+from rbs_gym.envs.tasks.curriculums.common import *
+from rbs_gym.envs.utils.math import distance_to_nearest_point
+
+
+class GraspStage(CurriculumStage):
+    """
+    Ordered enum that represents stages of a curriculum for Grasp (and GraspPlanetary) task.
+    """
+
+    REACH = 1
+    TOUCH = 2
+    GRASP = 3
+    LIFT = 4
+
+
+class GraspCurriculum(
+    StageRewardCurriculum,
+    SuccessRateImpl,
+    WorkspaceScaleCurriculum,
+    ObjectSpawnVolumeScaleCurriculum,
+    ObjectCountCurriculum,
+    ArmStuckChecker,
+):
+    """
+    Curriculum learning implementation for grasp task that provides termination (success/fail) and reward for each stage of the task.
+    """
+
+    def __init__(
+        self,
+        task: Task,
+        stages_base_reward: float,
+        reach_required_distance: float,
+        lift_required_height: float,
+        persistent_reward_each_step: float,
+        persistent_reward_terrain_collision: float,
+        persistent_reward_all_objects_outside_workspace: float,
+        persistent_reward_arm_stuck: float,
+        enable_stage_reward_curriculum: bool,
+        enable_workspace_scale_curriculum: bool,
+        enable_object_spawn_volume_scale_curriculum: bool,
+        enable_object_count_curriculum: bool,
+        reach_required_distance_min: Optional[float] = None,
+        reach_required_distance_max: Optional[float] = None,
+        reach_required_distance_max_threshold: Optional[float] = None,
+        lift_required_height_min: Optional[float] = None,
+        lift_required_height_max: Optional[float] = None,
+        lift_required_height_max_threshold: Optional[float] = None,
+        **kwargs,
+    ):
+
+        StageRewardCurriculum.__init__(self, curriculum_stage=GraspStage, **kwargs)
+        SuccessRateImpl.__init__(self, **kwargs)
+        WorkspaceScaleCurriculum.__init__(
+            self, task=task, success_rate_impl=self, **kwargs
+        )
+        ObjectSpawnVolumeScaleCurriculum.__init__(
+            self, task=task, success_rate_impl=self, **kwargs
+        )
+        ObjectCountCurriculum.__init__(
+            self, task=task, success_rate_impl=self, **kwargs
+        )
+        ArmStuckChecker.__init__(self, task=task, **kwargs)
+
+        # Grasp task/environment that will be used to extract information from the scene
+        self.__task = task
+
+        # Parameters
+        self.__stages_base_reward = stages_base_reward
+        self.reach_required_distance = reach_required_distance
+        self.lift_required_height = lift_required_height
+        self.__persistent_reward_each_step = persistent_reward_each_step
+        self.__persistent_reward_terrain_collision = persistent_reward_terrain_collision
+        self.__persistent_reward_all_objects_outside_workspace = (
+            persistent_reward_all_objects_outside_workspace
+        )
+        self.__persistent_reward_arm_stuck = persistent_reward_arm_stuck
+        self.__enable_stage_reward_curriculum = enable_stage_reward_curriculum
+        self.__enable_workspace_scale_curriculum = enable_workspace_scale_curriculum
+        self.__enable_object_spawn_volume_scale_curriculum = (
+            enable_object_spawn_volume_scale_curriculum
+        )
+        self.__enable_object_count_curriculum = enable_object_count_curriculum
+
+        # Make sure that the persistent rewards for each step are negative
+        if self.__persistent_reward_each_step > 0.0:
+            self.__persistent_reward_each_step *= -1.0
+        if self.__persistent_reward_terrain_collision > 0.0:
+            self.__persistent_reward_terrain_collision *= -1.0
+        if self.__persistent_reward_all_objects_outside_workspace > 0.0:
+            self.__persistent_reward_all_objects_outside_workspace *= -1.0
+        if self.__persistent_reward_arm_stuck > 0.0:
+            self.__persistent_reward_arm_stuck *= -1.0
+
+        # Setup curriculum for Reach distance requirement (if enabled)
+        reach_required_distance_min = (
+            reach_required_distance_min
+            if reach_required_distance_min is not None
+            else reach_required_distance
+        )
+        reach_required_distance_max = (
+            reach_required_distance_max
+            if reach_required_distance_max is not None
+            else reach_required_distance
+        )
+        reach_required_distance_max_threshold = (
+            reach_required_distance_max_threshold
+            if reach_required_distance_max_threshold is not None
+            else 0.5
+        )
+        self.__reach_required_distance_curriculum_enabled = (
+            not reach_required_distance_min == reach_required_distance_max
+        )
+        if self.__reach_required_distance_curriculum_enabled:
+            self.__reach_required_distance_curriculum = AttributeCurriculum(
+                success_rate_impl=self,
+                attribute_owner=self,
+                attribute_name="reach_required_distance",
+                initial_value=reach_required_distance_min,
+                target_value=reach_required_distance_max,
+                target_value_threshold=reach_required_distance_max_threshold,
+            )
+
+        # Setup curriculum for Lift height requirement (if enabled)
+        lift_required_height_min = (
+            lift_required_height_min
+            if lift_required_height_min is not None
+            else lift_required_height
+        )
+        lift_required_height_max = (
+            lift_required_height_max
+            if lift_required_height_max is not None
+            else lift_required_height
+        )
+        lift_required_height_max_threshold = (
+            lift_required_height_max_threshold
+            if lift_required_height_max_threshold is not None
+            else 0.5
+        )
+        # Offset Lift height requirement by the robot base offset
+        lift_required_height += task.robot_model_class.BASE_LINK_Z_OFFSET
+        lift_required_height_min += task.robot_model_class.BASE_LINK_Z_OFFSET
+        lift_required_height_max += task.robot_model_class.BASE_LINK_Z_OFFSET
+        lift_required_height_max_threshold += task.robot_model_class.BASE_LINK_Z_OFFSET
+
+        self.__lift_required_height_curriculum_enabled = (
+            not lift_required_height_min == lift_required_height_max
+        )
+        if self.__lift_required_height_curriculum_enabled:
+            self.__lift_required_height_curriculum = AttributeCurriculum(
+                success_rate_impl=self,
+                attribute_owner=self,
+                attribute_name="lift_required_height",
+                initial_value=lift_required_height_min,
+                target_value=lift_required_height_max,
+                target_value_threshold=lift_required_height_max_threshold,
+            )
+
+    def get_reward(self) -> Reward:
+
+        if self.__enable_stage_reward_curriculum:
+            # Try to get reward from each stage
+            return StageRewardCurriculum.get_reward(
+                self,
+                ee_position=self.__task.get_ee_position(),
+                object_positions=self.__task.get_object_positions(),
+                touched_objects=self.__task.get_touched_objects(),
+                grasped_objects=self.__task.get_grasped_objects(),
+            )
+        else:
+            # If curriculum for stages is disabled, compute reward only for the last stage
+            return StageRewardCurriculum.get_reward(
+                self,
+                only_last_stage=True,
+                object_positions=self.__task.get_object_positions(),
+                grasped_objects=self.__task.get_grasped_objects(),
+            )
+
+    def is_done(self) -> bool:
+
+        return StageRewardCurriculum.is_done(self)
+
+    def get_info(self) -> Dict:
+
+        info = StageRewardCurriculum.get_info(self)
+        info.update(SuccessRateImpl.get_info(self))
+        if self.__enable_workspace_scale_curriculum:
+            info.update(WorkspaceScaleCurriculum.get_info(self))
+        if self.__enable_object_spawn_volume_scale_curriculum:
+            info.update(ObjectSpawnVolumeScaleCurriculum.get_info(self))
+        if self.__enable_object_count_curriculum:
+            info.update(ObjectCountCurriculum.get_info(self))
+        if self.__persistent_reward_arm_stuck:
+            info.update(ArmStuckChecker.get_info(self))
+        if self.__reach_required_distance_curriculum_enabled:
+            info.update(self.__reach_required_distance_curriculum.get_info())
+        if self.__lift_required_height_curriculum_enabled:
+            info.update(self.__lift_required_height_curriculum.get_info())
+
+        return info
+
+    def reset_task(self):
+
+        StageRewardCurriculum.reset_task(self)
+        if self.__enable_workspace_scale_curriculum:
+            WorkspaceScaleCurriculum.reset_task(self)
+        if self.__enable_object_spawn_volume_scale_curriculum:
+            ObjectSpawnVolumeScaleCurriculum.reset_task(self)
+        if self.__enable_object_count_curriculum:
+            ObjectCountCurriculum.reset_task(self)
+        if self.__persistent_reward_arm_stuck:
+            ArmStuckChecker.reset_task(self)
+        if self.__reach_required_distance_curriculum_enabled:
+            self.__reach_required_distance_curriculum.reset_task()
+        if self.__lift_required_height_curriculum_enabled:
+            self.__lift_required_height_curriculum.reset_task()
+
+    def on_episode_success(self):
+
+        self.update_success_rate(is_success=True)
+
+    def on_episode_failure(self):
+
+        self.update_success_rate(is_success=False)
+
+    def on_episode_timeout(self):
+
+        self.update_success_rate(is_success=False)
+
+    def get_reward_REACH(
+        self,
+        ee_position: Tuple[float, float, float],
+        object_positions: Dict[str, Tuple[float, float, float]],
+        **kwargs,
+    ) -> float:
+
+        if not object_positions:
+            return 0.0
+
+        nearest_object_distance = distance_to_nearest_point(
+            origin=ee_position, points=list(object_positions.values())
+        )
+
+        self.__task.get_logger().debug(
+            f"[Curriculum] Distance to nearest object: {nearest_object_distance}"
+        )
+        if nearest_object_distance < self.reach_required_distance:
+            self.__task.get_logger().info(
+                f"[Curriculum] An object is now closer than the required distance of {self.reach_required_distance}"
+            )
+            self.stages_completed_this_episode[GraspStage.REACH] = True
+            return self.__stages_base_reward
+        else:
+            return 0.0
+
+    def get_reward_TOUCH(self, touched_objects: List[str], **kwargs) -> float:
+
+        if touched_objects:
+            self.__task.get_logger().info(
+                f"[Curriculum] Touched objects: {touched_objects}"
+            )
+            self.stages_completed_this_episode[GraspStage.TOUCH] = True
+            return self.__stages_base_reward
+        else:
+            return 0.0
+
+    def get_reward_GRASP(self, grasped_objects: List[str], **kwargs) -> float:
+
+        if grasped_objects:
+            self.__task.get_logger().info(
+                f"[Curriculum] Grasped objects: {grasped_objects}"
+            )
+            self.stages_completed_this_episode[GraspStage.GRASP] = True
+            return self.__stages_base_reward
+        else:
+            return 0.0
+
+    def get_reward_LIFT(
+        self,
+        object_positions: Dict[str, Tuple[float, float, float]],
+        grasped_objects: List[str],
+        **kwargs,
+    ) -> float:
+
+        if not (grasped_objects or object_positions):
+            return 0.0
+
+        for grasped_object in grasped_objects:
+            grasped_object_height = object_positions[grasped_object][2]
+
+            self.__task.get_logger().debug(
+                f"[Curriculum] Height of grasped object '{grasped_objects}': {grasped_object_height}"
+            )
+            if grasped_object_height > self.lift_required_height:
+                self.__task.get_logger().info(
+                    f"[Curriculum] Lifted object: {grasped_object}"
+                )
+                self.stages_completed_this_episode[GraspStage.LIFT] = True
+                return self.__stages_base_reward
+
+        return 0.0
+
+    def get_persistent_reward(
+        self, object_positions: Dict[str, Tuple[float, float, float]], **kwargs
+    ) -> float:
+
+        # Subtract a small reward each step to provide incentive to act quickly
+        reward = self.__persistent_reward_each_step
+
+        # Negative reward for colliding with terrain
+        if self.__persistent_reward_terrain_collision:
+            if self.__task.check_terrain_collision():
+                self.__task.get_logger().info(
+                    "[Curriculum] Robot collided with the terrain"
+                )
+                reward += self.__persistent_reward_terrain_collision
+
+        # Negative reward for having all objects outside of the workspace
+        if self.__persistent_reward_all_objects_outside_workspace:
+            if self.__task.check_all_objects_outside_workspace(
+                object_positions=object_positions
+            ):
+                self.__task.get_logger().warn(
+                    "[Curriculum] All objects are outside of the workspace"
+                )
+                reward += self.__persistent_reward_all_objects_outside_workspace
+                self.episode_failed = True
+
+        # Negative reward for arm getting stuck
+        if self.__persistent_reward_arm_stuck:
+            if ArmStuckChecker.is_robot_stuck(self):
+                self.__task.get_logger().error(
+                    f"[Curriculum] Robot appears to be stuck, resetting..."
+                )
+                reward += self.__persistent_reward_arm_stuck
+                self.episode_failed = True
+
+        return reward
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/manipulation.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/manipulation.py
@ -0,0 +1,670 @@
+import abc
+import multiprocessing
+import sys
+from itertools import count
+from threading import Thread
+from typing import Dict, Optional, Tuple, Union
+
+import numpy as np
+import rclpy
+from gym_gz.base.task import Task
+from gym_gz.utils.typing import (
+    Action,
+    ActionSpace,
+    Observation,
+    ObservationSpace,
+    Reward,
+)
+from rclpy.callback_groups import ReentrantCallbackGroup
+from rclpy.executors import MultiThreadedExecutor, SingleThreadedExecutor
+from rclpy.node import Node
+from scipy.spatial.transform import Rotation
+
+from rbs_gym.envs.control import *
+from rbs_gym.envs.models.robots import get_robot_model_class
+from rbs_gym.envs.utils import Tf2Broadcaster, Tf2Listener
+from rbs_gym.envs.utils.conversions import orientation_6d_to_quat
+from rbs_gym.envs.utils.gazebo import *
+from rbs_gym.envs.utils.math import quat_mul
+
+
+class Manipulation(Task, Node, abc.ABC):
+    _ids = count(0)
+
+    def __init__(
+        self,
+        agent_rate: float,
+        robot_model: str,
+        workspace_frame_id: str,
+        workspace_centre: Tuple[float, float, float],
+        workspace_volume: Tuple[float, float, float],
+        ignore_new_actions_while_executing: bool,
+        use_servo: bool,
+        scaling_factor_translation: float,
+        scaling_factor_rotation: float,
+        restrict_position_goal_to_workspace: bool,
+        enable_gripper: bool,
+        num_threads: int,
+        **kwargs,
+    ):
+
+        # Get next ID for this task instance
+        self.id = next(self._ids)
+
+        # Initialize the Task base class
+        Task.__init__(self, agent_rate=agent_rate)
+
+        # Initialize ROS 2 context (if not done before)
+        try:
+            rclpy.init()
+        except Exception as e:
+            if not rclpy.ok():
+                sys.exit(f"ROS 2 context could not be initialised: {e}")
+
+        # Initialize ROS 2 Node base class
+        Node.__init__(self, f"rbs_gym_{self.id}")
+
+        # Create callback group that allows execution of callbacks in parallel without restrictions
+        self._callback_group = ReentrantCallbackGroup()
+
+        # Create executor
+        if num_threads == 1:
+            executor = SingleThreadedExecutor()
+        elif num_threads > 1:
+            executor = MultiThreadedExecutor(
+                num_threads=num_threads,
+            )
+        else:
+            executor = MultiThreadedExecutor(num_threads=multiprocessing.cpu_count())
+
+        # Add this node to the executor
+        executor.add_node(self)
+
+        # Spin this node in background thread(s)
+        self._executor_thread = Thread(target=executor.spin, daemon=True, args=())
+        self._executor_thread.start()
+
+        # Get class of the robot model based on passed argument
+        self.robot_model_class = get_robot_model_class(robot_model)
+
+        # Store passed arguments for later use
+        self.workspace_centre = (
+            workspace_centre[0],
+            workspace_centre[1],
+            workspace_centre[2] + self.robot_model_class.BASE_LINK_Z_OFFSET,
+        )
+        self.workspace_volume = workspace_volume
+        self._restrict_position_goal_to_workspace = restrict_position_goal_to_workspace
+        self._use_servo = use_servo
+        self.__scaling_factor_translation = scaling_factor_translation
+        self.__scaling_factor_rotation = scaling_factor_rotation
+        self._enable_gripper = enable_gripper
+
+        # Get workspace bounds, useful is many computations
+        workspace_volume_half = (
+            workspace_volume[0] / 2,
+            workspace_volume[1] / 2,
+            workspace_volume[2] / 2,
+        )
+        self.workspace_min_bound = (
+            self.workspace_centre[0] - workspace_volume_half[0],
+            self.workspace_centre[1] - workspace_volume_half[1],
+            self.workspace_centre[2] - workspace_volume_half[2],
+        )
+        self.workspace_max_bound = (
+            self.workspace_centre[0] + workspace_volume_half[0],
+            self.workspace_centre[1] + workspace_volume_half[1],
+            self.workspace_centre[2] + workspace_volume_half[2],
+        )
+
+        # Determine robot name and prefix based on current ID of the task
+        self.robot_prefix: str = self.robot_model_class.DEFAULT_PREFIX
+        if 0 == self.id:
+            self.robot_name = self.robot_model_class.ROBOT_MODEL_NAME
+        else:
+            self.robot_name = f"{self.robot_model_class.ROBOT_MODEL_NAME}{self.id}"
+            if self.robot_prefix.endswith("_"):
+                self.robot_prefix = f"{self.robot_prefix[:-1]}{self.id}_"
+            elif self.robot_prefix == "":
+                self.robot_prefix = f"robot{self.id}_"
+
+        # Names of specific robot links, useful all around the code
+        self.robot_base_link_name = self.robot_model_class.get_robot_base_link_name(
+            self.robot_prefix
+        )
+        self.robot_arm_base_link_name = self.robot_model_class.get_arm_base_link_name(
+            self.robot_prefix
+        )
+        self.robot_ee_link_name = self.robot_model_class.get_ee_link_name(
+            self.robot_prefix
+        )
+        self.robot_arm_link_names = self.robot_model_class.get_arm_link_names(
+            self.robot_prefix
+        )
+        self.robot_gripper_link_names = self.robot_model_class.get_gripper_link_names(
+            self.robot_prefix
+        )
+        self.robot_arm_joint_names = self.robot_model_class.get_arm_joint_names(
+            self.robot_prefix
+        )
+        self.robot_gripper_joint_names = self.robot_model_class.get_gripper_joint_names(
+            self.robot_prefix
+        )
+
+        # Get exact name substitution of the frame for workspace
+        self.workspace_frame_id = self.substitute_special_frame(workspace_frame_id)
+
+        # Specify initial positions (default configuration is used here)
+        self.initial_arm_joint_positions = (
+            self.robot_model_class.DEFAULT_ARM_JOINT_POSITIONS
+        )
+        self.initial_gripper_joint_positions = (
+            self.robot_model_class.DEFAULT_GRIPPER_JOINT_POSITIONS
+        )
+
+        # Names of important models (in addition to robot model)
+        self.terrain_name = "terrain"
+        self.object_names = []
+
+        # Setup listener and broadcaster of transforms via tf2
+        self.tf2_listener = Tf2Listener(node=self)
+        self.tf2_broadcaster = Tf2Broadcaster(node=self)
+
+        self.cartesian_control = True #TODO: make it as an external parameter
+
+        # Setup controllers
+        self.controller = CartesianForceController(self)
+        if not self.cartesian_control:
+            self.joint_controller = JointEffortController(self)
+        if self._enable_gripper:
+            self.gripper = GripperController(self, 0.064)
+
+        # Initialize task and randomizer overrides (e.g. from curriculum)
+        # Both of these are consumed at the beginning of reset
+        self.__task_parameter_overrides: Dict[str, any] = {}
+        self._randomizer_parameter_overrides: Dict[str, any] = {}
+
+    def create_spaces(self) -> Tuple[ActionSpace, ObservationSpace]:
+
+        action_space = self.create_action_space()
+        observation_space = self.create_observation_space()
+
+        return action_space, observation_space
+
+    def create_action_space(self) -> ActionSpace:
+
+        raise NotImplementedError()
+
+    def create_observation_space(self) -> ObservationSpace:
+
+        raise NotImplementedError()
+
+    def set_action(self, action: Action):
+
+        raise NotImplementedError()
+
+    def get_observation(self) -> Observation:
+
+        raise NotImplementedError()
+
+    def get_reward(self) -> Reward:
+
+        raise NotImplementedError()
+
+    def is_done(self) -> bool:
+
+        raise NotImplementedError()
+
+    def reset_task(self):
+
+        self.__consume_parameter_overrides()
+
+    # Helper functions #
+    def get_relative_ee_position(
+        self, translation: Tuple[float, float, float]
+    ) -> Tuple[float, float, float]:
+
+        # Scale relative action to metric units
+        translation = self.scale_relative_translation(translation)
+        # Get current position
+        current_position = self.get_ee_position()
+        # Compute target position
+        target_position = (
+            current_position[0] + translation[0],
+            current_position[1] + translation[1],
+            current_position[2] + translation[2],
+        )
+
+        # Restrict target position to a limited workspace, if desired
+        if self._restrict_position_goal_to_workspace:
+            target_position = self.restrict_position_goal_to_workspace(target_position)
+
+        return target_position
+
+    def get_relative_ee_orientation(
+        self,
+        rotation: Union[
+            float,
+            Tuple[float, float, float, float],
+            Tuple[float, float, float, float, float, float],
+        ],
+        representation: str = "quat",
+    ) -> Tuple[float, float, float, float]:
+
+        # Get current orientation
+        current_quat_xyzw = self.get_ee_orientation()
+
+        # For 'z' representation, result should always point down
+        # Therefore, create a new quatertnion that contains only yaw component
+        if "z" == representation:
+            current_yaw = Rotation.from_quat(current_quat_xyzw).as_euler("xyz")[2]
+            current_quat_xyzw = Rotation.from_euler(
+                "xyz", [np.pi, 0, current_yaw]
+            ).as_quat()
+
+        # Convert relative orientation representation to quaternion
+        relative_quat_xyzw = None
+        if "quat" == representation:
+            relative_quat_xyzw = rotation
+        elif "6d" == representation:
+            vectors = tuple(
+                rotation[x : x + 3] for x, _ in enumerate(rotation) if x % 3 == 0
+            )
+            relative_quat_xyzw = orientation_6d_to_quat(vectors[0], vectors[1])
+        elif "z" == representation:
+            rotation = self.scale_relative_rotation(rotation)
+            relative_quat_xyzw = Rotation.from_euler("xyz", [0, 0, rotation]).as_quat()
+
+        # Compute target position (combine quaternions)
+        target_quat_xyzw = quat_mul(current_quat_xyzw, relative_quat_xyzw)
+
+        # Normalise quaternion (should not be needed, but just to be safe)
+        target_quat_xyzw /= np.linalg.norm(target_quat_xyzw)
+
+        return target_quat_xyzw
+
+    def scale_relative_translation(
+        self, translation: Tuple[float, float, float]
+    ) -> Tuple[float, float, float]:
+
+        return (
+            self.__scaling_factor_translation * translation[0],
+            self.__scaling_factor_translation * translation[1],
+            self.__scaling_factor_translation * translation[2],
+        )
+
+    def scale_relative_rotation(
+        self,
+        rotation: Union[float, Tuple[float, float, float], np.floating, np.ndarray],
+    ) -> float:
+
+        if not hasattr(rotation, "__len__"):
+            return self.__scaling_factor_rotation * rotation
+        else:
+            return (
+                self.__scaling_factor_rotation * rotation[0],
+                self.__scaling_factor_rotation * rotation[1],
+                self.__scaling_factor_rotation * rotation[2],
+            )
+
+    def restrict_position_goal_to_workspace(
+        self, position: Tuple[float, float, float]
+    ) -> Tuple[float, float, float]:
+
+        return (
+            min(
+                self.workspace_max_bound[0],
+                max(
+                    self.workspace_min_bound[0],
+                    position[0],
+                ),
+            ),
+            min(
+                self.workspace_max_bound[1],
+                max(
+                    self.workspace_min_bound[1],
+                    position[1],
+                ),
+            ),
+            min(
+                self.workspace_max_bound[2],
+                max(
+                    self.workspace_min_bound[2],
+                    position[2],
+                ),
+            ),
+        )
+
+    def restrict_servo_translation_to_workspace(
+        self, translation: Tuple[float, float, float]
+    ) -> Tuple[float, float, float]:
+
+        current_ee_position = self.get_ee_position()
+
+        translation = tuple(
+            0.0
+            if (
+                current_ee_position[i] > self.workspace_max_bound[i]
+                and translation[i] > 0.0
+            )
+            or (
+                current_ee_position[i] < self.workspace_min_bound[i]
+                and translation[i] < 0.0
+            )
+            else translation[i]
+            for i in range(3)
+        )
+
+        return translation
+
+    def get_ee_pose(
+        self,
+    ) -> Optional[Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]]:
+        """
+        Return the current pose of the end effector with respect to arm base link.
+        """
+
+        try:
+            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
+            ee_position, ee_quat_xyzw = get_model_pose(
+                world=self.world,
+                model=robot_model,
+                link=self.robot_ee_link_name,
+                xyzw=True,
+            )
+            return transform_change_reference_frame_pose(
+                world=self.world,
+                position=ee_position,
+                quat=ee_quat_xyzw,
+                target_model=robot_model,
+                target_link=self.robot_arm_base_link_name,
+                xyzw=True,
+            )
+        except Exception as e:
+            self.get_logger().warn(
+                f"Cannot get end effector pose from Gazebo ({e}), using tf2..."
+            )
+            transform = self.tf2_listener.lookup_transform_sync(
+                source_frame=self.robot_ee_link_name,
+                target_frame=self.robot_arm_base_link_name,
+                retry=False,
+            )
+            if transform is not None:
+                return (
+                    (
+                        transform.translation.x,
+                        transform.translation.y,
+                        transform.translation.z,
+                    ),
+                    (
+                        transform.rotation.x,
+                        transform.rotation.y,
+                        transform.rotation.z,
+                        transform.rotation.w,
+                    ),
+                )
+            else:
+                self.get_logger().error(
+                    "Cannot get pose of the end effector (default values are returned)"
+                )
+                return (
+                    (0.0, 0.0, 0.0),
+                    (0.0, 0.0, 0.0, 1.0),
+                )
+
+    def get_ee_position(self) -> Tuple[float, float, float]:
+        """
+        Return the current position of the end effector with respect to arm base link.
+        """
+
+        try:
+            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
+            ee_position = get_model_position(
+                world=self.world,
+                model=robot_model,
+                link=self.robot_ee_link_name,
+            )
+            return transform_change_reference_frame_position(
+                world=self.world,
+                position=ee_position,
+                target_model=robot_model,
+                target_link=self.robot_arm_base_link_name,
+            )
+        except Exception as e:
+            self.get_logger().debug(
+                f"Cannot get end effector position from Gazebo ({e}), using tf2..."
+            )
+            transform = self.tf2_listener.lookup_transform_sync(
+                source_frame=self.robot_ee_link_name,
+                target_frame=self.robot_arm_base_link_name,
+                retry=False,
+            )
+            if transform is not None:
+                return (
+                    transform.translation.x,
+                    transform.translation.y,
+                    transform.translation.z,
+                )
+            else:
+                self.get_logger().error(
+                    "Cannot get position of the end effector (default values are returned)"
+                )
+                return (0.0, 0.0, 0.0)
+
+    def get_ee_orientation(self) -> Tuple[float, float, float, float]:
+        """
+        Return the current xyzw quaternion of the end effector with respect to arm base link.
+        """
+
+        try:
+            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
+            ee_quat_xyzw = get_model_orientation(
+                world=self.world,
+                model=robot_model,
+                link=self.robot_ee_link_name,
+                xyzw=True,
+            )
+            return transform_change_reference_frame_orientation(
+                world=self.world,
+                quat=ee_quat_xyzw,
+                target_model=robot_model,
+                target_link=self.robot_arm_base_link_name,
+                xyzw=True,
+            )
+        except Exception as e:
+            self.get_logger().warn(
+                f"Cannot get end effector orientation from Gazebo ({e}), using tf2..."
+            )
+            transform = self.tf2_listener.lookup_transform_sync(
+                source_frame=self.robot_ee_link_name,
+                target_frame=self.robot_arm_base_link_name,
+                retry=False,
+            )
+            if transform is not None:
+                return (
+                    transform.rotation.x,
+                    transform.rotation.y,
+                    transform.rotation.z,
+                    transform.rotation.w,
+                )
+            else:
+                self.get_logger().error(
+                    "Cannot get orientation of the end effector (default values are returned)"
+                )
+                return (0.0, 0.0, 0.0, 1.0)
+
+    def get_object_position(
+        self, object_model: Union[ModelWrapper, str]
+    ) -> Tuple[float, float, float]:
+        """
+        Return the current position of an object with respect to arm base link.
+        Note: Only simulated objects are currently supported.
+        """
+
+        try:
+            object_position = get_model_position(
+                world=self.world,
+                model=object_model,
+            )
+            return transform_change_reference_frame_position(
+                world=self.world,
+                position=object_position,
+                target_model=self.robot_name,
+                target_link=self.robot_arm_base_link_name,
+            )
+        except Exception as e:
+            self.get_logger().error(
+                f"Cannot get position of {object_model} object (default values are returned): {e}"
+            )
+            return (0.0, 0.0, 0.0)
+
+    def get_object_positions(self) -> Dict[str, Tuple[float, float, float]]:
+        """
+        Return the current position of all objects with respect to arm base link.
+        Note: Only simulated objects are currently supported.
+        """
+
+        object_positions = {}
+
+        try:
+            robot_model = self.world.to_gazebo().get_model(self.robot_name).to_gazebo()
+            robot_arm_base_link = robot_model.get_link(
+                link_name=self.robot_arm_base_link_name
+            )
+            for object_name in self.object_names:
+                object_position = get_model_position(
+                    world=self.world,
+                    model=object_name,
+                )
+                object_positions[
+                    object_name
+                ] = transform_change_reference_frame_position(
+                    world=self.world,
+                    position=object_position,
+                    target_model=robot_model,
+                    target_link=robot_arm_base_link,
+                )
+        except Exception as e:
+            self.get_logger().error(
+                f"Cannot get positions of all objects (empty Dict is returned): {e}"
+            )
+
+        return object_positions
+
+    def substitute_special_frame(self, frame_id: str) -> str:
+
+        if "arm_base_link" == frame_id:
+            return self.robot_arm_base_link_name
+        elif "base_link" == frame_id:
+            return self.robot_base_link_name
+        elif "end_effector" == frame_id:
+            return self.robot_ee_link_name
+        elif "world" == frame_id:
+            try:
+                # In Gazebo, where multiple worlds are allowed
+                return self.world.to_gazebo().name()
+            except Exception as e:
+                self.get_logger().warn(f"")
+                # Otherwise (e.g. real world)
+                return "rbs_gym_world"
+        else:
+            return frame_id
+
+    def wait_until_action_executed(self):
+
+        if self._enable_gripper:
+            self.gripper.wait_until_executed()
+
+    def move_to_initial_joint_configuration(self):
+        pass
+
+        # self.moveit2.move_to_configuration(self.initial_arm_joint_positions)
+
+        # if (
+        #     self.robot_model_class.CLOSED_GRIPPER_JOINT_POSITIONS
+        #     == self.initial_gripper_joint_positions
+        # ):
+        #     self.gripper.reset_close()
+        # else:
+        #     self.gripper.reset_open()
+
+    def check_terrain_collision(self) -> bool:
+        """
+        Returns true if robot links are in collision with the ground.
+        """
+        robot_name_len = len(self.robot_name)
+        terrain_model = self.world.get_model(self.terrain_name)
+
+        for contact in terrain_model.contacts():
+            body_b = contact.body_b
+            
+            if body_b.startswith(self.robot_name) and len(body_b) > robot_name_len:
+                link = body_b[robot_name_len + 2:]
+
+                if link != self.robot_base_link_name and (
+                    link in self.robot_arm_link_names or link in self.robot_gripper_link_names
+                ):
+                    return True
+
+        return False
+
+    def check_all_objects_outside_workspace(
+        self,
+        object_positions: Dict[str, Tuple[float, float, float]],
+    ) -> bool:
+        """
+        Returns true if all objects are outside the workspace
+        """
+
+        return all(
+            [
+                self.check_object_outside_workspace(object_position)
+                for object_position in object_positions.values()
+            ]
+        )
+
+    def check_object_outside_workspace(
+        self,
+        object_position: Tuple[float, float, float],
+    ) -> bool:
+        """
+        Returns true if the object is outside the workspace
+        """
+
+        return (
+            object_position[0] < self.workspace_min_bound[0]
+            or object_position[1] < self.workspace_min_bound[1]
+            or object_position[2] < self.workspace_min_bound[2]
+            or object_position[0] > self.workspace_max_bound[0]
+            or object_position[1] > self.workspace_max_bound[1]
+            or object_position[2] > self.workspace_max_bound[2]
+        )
+
+    def add_parameter_overrides(self, parameter_overrides: Dict[str, any]):
+
+        self.add_task_parameter_overrides(parameter_overrides)
+        self.add_randomizer_parameter_overrides(parameter_overrides)
+
+    def add_task_parameter_overrides(self, parameter_overrides: Dict[str, any]):
+
+        self.__task_parameter_overrides.update(parameter_overrides)
+
+    def add_randomizer_parameter_overrides(self, parameter_overrides: Dict[str, any]):
+
+        self._randomizer_parameter_overrides.update(parameter_overrides)
+
+    def __consume_parameter_overrides(self):
+
+        for key, value in self.__task_parameter_overrides.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            elif hasattr(self, f"_{key}"):
+                setattr(self, f"_{key}", value)
+            elif hasattr(self, f"__{key}"):
+                setattr(self, f"__{key}", value)
+            else:
+                self.get_logger().error(
+                    f"Override '{key}' is not supperted by the task."
+                )
+
+        self.__task_parameter_overrides.clear()
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/init.py
@ -0,0 +1,4 @@
+from .reach import Reach
+from .reach_color_image import ReachColorImage
+from .reach_depth_image import ReachDepthImage
+# from .reach_octree import ReachOctree
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach.py
@ -0,0 +1,194 @@
+import abc
+from typing import Tuple
+
+from geometry_msgs.msg import WrenchStamped
+import gymnasium as gym
+import numpy as np
+from gym_gz.utils.typing import (
+    Action,
+    ActionSpace,
+    Observation,
+    ObservationSpace,
+    Reward,
+)
+
+from rbs_gym.envs.tasks.manipulation import Manipulation
+from rbs_gym.envs.utils.math import distance_to_nearest_point
+from std_msgs.msg import Float64MultiArray
+from rbs_gym.envs.observation import TwistSubscriber, JointStates
+
+
+class Reach(Manipulation, abc.ABC):
+    def __init__(
+        self,
+        sparse_reward: bool,
+        collision_reward: float,
+        act_quick_reward: float,
+        required_accuracy: float,
+        **kwargs,
+    ):
+
+        # Initialize the Task base class
+        Manipulation.__init__(
+            self,
+            **kwargs,
+        )
+
+        # Additional parameters
+        self._sparse_reward: bool = sparse_reward
+        self._act_quick_reward = (
+            act_quick_reward if act_quick_reward >= 0.0 else -act_quick_reward
+        )
+        self._collision_reward = (
+            collision_reward if collision_reward >= 0.0 else -collision_reward
+        )
+        self._required_accuracy: float = required_accuracy
+
+        # Flag indicating if the task is done (performance - get_reward + is_done)
+        self._is_done: bool = False
+        self._is_truncated: bool = False
+        self._is_terminated: bool = False
+
+        # Distance to target in the previous step (or after reset)
+        self._previous_distance: float = None
+        # self._collision_reward: float = collision_reward
+
+        self.initial_gripper_joint_positions = (
+            self.robot_model_class.CLOSED_GRIPPER_JOINT_POSITIONS
+        )
+        self.twist = TwistSubscriber(self,
+                                     topic="/cartesian_force_controller/current_twist", 
+                                     callback_group=self._callback_group)
+
+        self.joint_states = JointStates(self, topic="/joint_states", callback_group=self._callback_group)
+
+        self._action = WrenchStamped()
+        self._action_array: Action = []
+
+
+    def create_action_space(self) -> ActionSpace:
+
+        # 0:3 - (x, y, z) force
+        # 3:6 - (x, y, z) torque
+        return gym.spaces.Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32)
+
+    def create_observation_space(self) -> ObservationSpace:
+
+        # 0:3 - (x, y, z) end effector position
+        # 3:6 - (x, y, z) target position
+        # 6:9 - (x, y, z) current twist
+        # Note: These could theoretically be restricted to the workspace and object spawn area instead of inf
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
+
+    def set_action(self, action: Action):
+
+        # self.get_logger().debug(f"action: {action}")
+        # act = Float64MultiArray()
+        # act.data = action
+        # self.joint_controller.publisher.publish(act)
+    
+        # Store action for reward function
+        self._action_array = action
+
+        # self._action.header.frame_id = self.robot_ee_link_name
+        self._action.header.stamp = self.get_clock().now().to_msg()
+        self._action.header.frame_id = self.robot_ee_link_name
+        self._action.wrench.force.x = float(action[0]) * 30.0
+        self._action.wrench.force.y = float(action[1]) * 30.0
+        self._action.wrench.force.z = float(action[2]) * 30.0
+        # self._action.wrench.torque.x = float(action[3]) * 10.0
+        # self._action.wrench.torque.y = float(action[4]) * 10.0
+        # self._action.wrench.torque.z = float(action[5]) * 10.0
+        self.controller.publisher.publish(self._action)
+
+    def get_observation(self) -> Observation:
+
+        # Get current end-effector and target positions
+        ee_position = self.get_ee_position()
+        target_position = self.get_object_position(object_model=self.object_names[0])
+        # joint_states = tuple(self.joint_states.get_positions())
+
+        # self.get_logger().warn(f"joint_states: {joint_states[:7]}")
+
+        twist = self.twist.get_observation()
+        twt = (twist.twist.linear.x,
+               twist.twist.linear.y,
+               twist.twist.linear.z,
+               twist.twist.angular.x,
+               twist.twist.angular.y,
+               twist.twist.angular.z)
+
+        # Create the observation
+        observation = Observation(
+            np.concatenate([ee_position, target_position, twt], dtype=np.float32)
+        )
+
+        self.get_logger().debug(f"\nobservation: {observation}")
+
+        # Return the observation
+        return observation
+
+    def get_reward(self) -> Reward:
+
+        reward = 0.0
+
+        # Compute the current distance to the target
+        current_distance = self.get_distance_to_target()
+
+        # Mark the episode done if target is reached
+        if current_distance < self._required_accuracy:
+            self._is_terminated = True
+            reward += 1.0 if self._sparse_reward else 0.0 # 100.0
+        self.get_logger().debug(f"reward_target: {reward}")
+
+        # Give reward based on how much closer robot got relative to the target for dense reward
+        if not self._sparse_reward:
+            distance_delta = self._previous_distance - current_distance
+            reward += distance_delta * 10.0
+            self._previous_distance = current_distance
+            self.get_logger().debug(f"reward_distance: {reward}")
+
+        if self.check_terrain_collision():
+            reward -= self._collision_reward
+            self._is_truncated = True
+            self.get_logger().debug(f"reward_collision: {reward}")
+
+        # Reward control
+        # reward -= np.abs(self._action_array).sum() * 0.01
+        # self.get_logger().debug(f"reward_c: {reward}")
+
+        # Subtract a small reward each step to provide incentive to act quickly (if enabled)
+        reward += self._act_quick_reward
+
+        self.get_logger().debug(f"reward: {reward}")
+
+        return Reward(reward)
+
+    def is_terminated(self) -> bool:
+        self.get_logger().debug(f"terminated: {self._is_terminated}")
+        return self._is_terminated
+    def is_truncated(self) -> bool:
+        self.get_logger().debug(f"truncated: {self._is_truncated}")
+        return self._is_truncated
+
+    def reset_task(self):
+
+        Manipulation.reset_task(self)
+
+        self._is_done = False
+        self._is_truncated = False
+        self._is_terminated = False
+
+        # Compute and store the distance after reset if using dense reward
+        if not self._sparse_reward:
+            self._previous_distance = self.get_distance_to_target()
+
+        self.get_logger().debug(f"\ntask reset")
+
+    def get_distance_to_target(self) -> Tuple[float, float, float]:
+
+        ee_position = self.get_ee_position()
+        object_position = self.get_object_position(object_model=self.object_names[0])
+        self.tf2_broadcaster.broadcast_tf("world", "object", object_position, (0.0,0.0,0.0,1.0), xyzw=True)
+
+        return distance_to_nearest_point(origin=ee_position, points=[object_position])
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_color_image.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_color_image.py
@ -0,0 +1,83 @@
+import abc
+
+import gymnasium as gym
+import numpy as np
+from gym_gz.utils.typing import Observation, ObservationSpace
+
+from rbs_gym.envs.models.sensors import Camera
+from rbs_gym.envs.observation import CameraSubscriber
+from rbs_gym.envs.tasks.reach import Reach
+
+
+class ReachColorImage(Reach, abc.ABC):
+    def __init__(
+        self,
+        camera_width: int,
+        camera_height: int,
+        camera_type: str = "camera",
+        monochromatic: bool = False,
+        **kwargs,
+    ):
+
+        # Initialize the Task base class
+        Reach.__init__(
+            self,
+            **kwargs,
+        )
+
+        # Store parameters for later use
+        self._camera_width = camera_width
+        self._camera_height = camera_height
+        self._monochromatic = monochromatic
+
+        # Perception (RGB camera)
+        self.camera_sub = CameraSubscriber(
+            node=self,
+            topic=Camera.get_color_topic(camera_type),
+            is_point_cloud=False,
+            callback_group=self._callback_group,
+        )
+
+    def create_observation_space(self) -> ObservationSpace:
+
+        # 0:3*height*width - rgb image
+        # 0:1*height*width - monochromatic (intensity) image
+        return gym.spaces.Box(
+            low=0,
+            high=255,
+            shape=(
+                self._camera_height,
+                self._camera_width,
+                1 if self._monochromatic else 3,
+            ),
+            dtype=np.uint8,
+        )
+
+    def get_observation(self) -> Observation:
+
+        # Get the latest image
+        image = self.camera_sub.get_observation()
+
+        assert (
+            image.width == self._camera_width and image.height == self._camera_height
+        ), f"Error: Resolution of the input image does not match the configured observation space. ({image.width}x{image.height} instead of {self._camera_width}x{self._camera_height})"
+
+        # Reshape and create the observation
+        color_image = np.array(image.data, dtype=np.uint8).reshape(
+            self._camera_height, self._camera_width, 3
+        )
+
+        # # Debug save images
+        # from PIL import Image
+        # img_color = Image.fromarray(color_image)
+        # img_color.save("img_color.png")
+
+        if self._monochromatic:
+            observation = Observation(color_image[:, :, 0])
+        else:
+            observation = Observation(color_image)
+
+        self.get_logger().debug(f"\nobservation: {observation}")
+
+        # Return the observation
+        return observation
--- a/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_depth_image.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/tasks/reach/reach_depth_image.py
@ -0,0 +1,67 @@
+import abc
+
+import gymnasium as gym
+import numpy as np
+from gym_gz.utils.typing import Observation, ObservationSpace
+
+from rbs_gym.envs.models.sensors import Camera
+from rbs_gym.envs.observation import CameraSubscriber
+from rbs_gym.envs.tasks.reach import Reach
+
+
+class ReachDepthImage(Reach, abc.ABC):
+    def __init__(
+        self,
+        camera_width: int,
+        camera_height: int,
+        camera_type: str = "depth_camera",
+        **kwargs,
+    ):
+
+        # Initialize the Task base class
+        Reach.__init__(
+            self,
+            **kwargs,
+        )
+
+        # Store parameters for later use
+        self._camera_width = camera_width
+        self._camera_height = camera_height
+
+        # Perception (depth camera)
+        self.camera_sub = CameraSubscriber(
+            node=self,
+            topic=Camera.get_depth_topic(camera_type),
+            is_point_cloud=False,
+            callback_group=self._callback_group,
+        )
+
+    def create_observation_space(self) -> ObservationSpace:
+
+        # 0:height*width - depth image
+        return gym.spaces.Box(
+            low=0,
+            high=np.inf,
+            shape=(self._camera_height, self._camera_width, 1),
+            dtype=np.float32,
+        )
+
+    def get_observation(self) -> Observation:
+
+        # Get the latest image
+        image = self.camera_sub.get_observation()
+
+        # Construct from buffer and reshape
+        depth_image = np.frombuffer(image.data, dtype=np.float32).reshape(
+            self._camera_height, self._camera_width, 1
+        )
+        # Replace all instances of infinity with 0
+        depth_image[depth_image == np.inf] = 0.0
+
+        # Create the observation
+        observation = Observation(depth_image)
+
+        self.get_logger().debug(f"\nobservation: {observation}")
+
+        # Return the observation
+        return observation
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/init.py
@ -0,0 +1,3 @@
+from . import conversions, gazebo, logging, math
+from .tf2_broadcaster import Tf2Broadcaster, Tf2BroadcasterStandalone
+from .tf2_listener import Tf2Listener, Tf2ListenerStandalone
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/conversions.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/conversions.py
@ -0,0 +1,183 @@
+from typing import Tuple, Union
+
+import numpy
+import open3d
+# import pyoctree
+import sensor_msgs
+from scipy.spatial.transform import Rotation
+
+from sensor_msgs.msg import PointCloud2
+from open3d.geometry import PointCloud
+from geometry_msgs.msg import Transform
+
+
+def pointcloud2_to_open3d(
+    ros_point_cloud2: PointCloud2,
+    include_color: bool = False,
+    include_intensity: bool = False,
+    # Note: Order does not matter for DL, that's why channel swapping is disabled by default
+    fix_rgb_channel_order: bool = False,
+) -> PointCloud:
+
+    # Create output Open3D PointCloud
+    open3d_pc = PointCloud()
+
+    size = ros_point_cloud2.width * ros_point_cloud2.height
+    xyz_dtype = ">f4" if ros_point_cloud2.is_bigendian else "<f4"
+    xyz = numpy.ndarray(
+        shape=(size, 3),
+        dtype=xyz_dtype,
+        buffer=ros_point_cloud2.data,
+        offset=0,
+        strides=(ros_point_cloud2.point_step, 4),
+    )
+
+    valid_points = numpy.isfinite(xyz).any(axis=1)
+    open3d_pc.points = open3d.utility.Vector3dVector(
+        xyz[valid_points].astype(numpy.float64)
+    )
+
+    if include_color or include_intensity:
+        if len(ros_point_cloud2.fields) > 3:
+            bgr = numpy.ndarray(
+                shape=(size, 3),
+                dtype=numpy.uint8,
+                buffer=ros_point_cloud2.data,
+                offset=ros_point_cloud2.fields[3].offset,
+                strides=(ros_point_cloud2.point_step, 1),
+            )
+            if fix_rgb_channel_order:
+                # Swap channels to gain rgb (faster than `bgr[:, [2, 1, 0]]`)
+                bgr[:, 0], bgr[:, 2] = bgr[:, 2], bgr[:, 0].copy()
+            open3d_pc.colors = open3d.utility.Vector3dVector(
+                (bgr[valid_points] / 255).astype(numpy.float64)
+            )
+        else:
+            open3d_pc.colors = open3d.utility.Vector3dVector(
+                numpy.zeros((len(valid_points), 3), dtype=numpy.float64)
+            )
+    # TODO: Update octree creator once L8 image format is supported in Ignition Gazebo
+    # elif include_intensity:
+    #     # Faster approach, but only the first channel gets the intensity value (rest is 0)
+    #     intensities = numpy.zeros((len(valid_points), 3), dtype=numpy.float64)
+    #     intensities[:, [0]] = (
+    #         numpy.ndarray(
+    #             shape=(size, 1),
+    #             dtype=numpy.uint8,
+    #             buffer=ros_point_cloud2.data,
+    #             offset=ros_point_cloud2.fields[3].offset,
+    #             strides=(ros_point_cloud2.point_step, 1),
+    #         )[valid_points]
+    #         / 255
+    #     ).astype(numpy.float64)
+    #     open3d_pc.colors = open3d.utility.Vector3dVector(intensities)
+    #     # # Slower approach, but all channels get the intensity value
+    #     # intensities = numpy.ndarray(
+    #     #     shape=(size, 1),
+    #     #     dtype=numpy.uint8,
+    #     #     buffer=ros_point_cloud2.data,
+    #     #     offset=ros_point_cloud2.fields[3].offset,
+    #     #     strides=(ros_point_cloud2.point_step, 1),
+    #     # )
+    #     # open3d_pc.colors = open3d.utility.Vector3dVector(
+    #     #     numpy.tile(intensities[valid_points] / 255, (1, 3)).astype(numpy.float64)
+    #     # )
+
+    # Return the converted Open3D PointCloud
+    return open3d_pc
+
+
+def transform_to_matrix(transform: Transform) -> numpy.ndarray:
+
+    transform_matrix = numpy.zeros((4, 4))
+    transform_matrix[3, 3] = 1.0
+
+    transform_matrix[0:3, 0:3] = open3d.geometry.get_rotation_matrix_from_quaternion(
+        [
+            transform.rotation.w,
+            transform.rotation.x,
+            transform.rotation.y,
+            transform.rotation.z,
+        ]
+    )
+    transform_matrix[0, 3] = transform.translation.x
+    transform_matrix[1, 3] = transform.translation.y
+    transform_matrix[2, 3] = transform.translation.z
+
+    return transform_matrix
+
+
+# def open3d_point_cloud_to_octree_points(
+#     open3d_point_cloud: PointCloud,
+#     include_color: bool = False,
+#     include_intensity: bool = False,
+# ) -> pyoctree.Points:
+#
+#     octree_points = pyoctree.Points()
+#
+#     if include_color:
+#         features = numpy.reshape(numpy.asarray(open3d_point_cloud.colors), -1)
+#     elif include_intensity:
+#         features = numpy.asarray(open3d_point_cloud.colors)[:, 0]
+#     else:
+#         features = []
+#
+#     octree_points.set_points(
+#         # XYZ points
+#         numpy.reshape(numpy.asarray(open3d_point_cloud.points), -1),
+#         # Normals
+#         numpy.reshape(numpy.asarray(open3d_point_cloud.normals), -1),
+#         # Other features, e.g. color
+#         features,
+#         # Labels - not used
+#         [],
+#     )
+#
+#     return octree_points
+
+
+def orientation_6d_to_quat(
+    v1: Tuple[float, float, float], v2: Tuple[float, float, float]
+) -> Tuple[float, float, float, float]:
+
+    # Normalize vectors
+    col1 = v1 / numpy.linalg.norm(v1)
+    col2 = v2 / numpy.linalg.norm(v2)
+
+    # Find their orthogonal vector via cross product
+    col3 = numpy.cross(col1, col2)
+
+    # Stack into rotation matrix as columns, convert to quaternion and return
+    quat_xyzw = Rotation.from_matrix(numpy.array([col1, col2, col3]).T).as_quat()
+    return quat_xyzw
+
+
+def orientation_quat_to_6d(
+    quat_xyzw: Tuple[float, float, float, float]
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float]]:
+
+    # Convert quaternion into rotation matrix
+    rot_mat = Rotation.from_quat(quat_xyzw).as_matrix()
+
+    # Return first two columns (already normalised)
+    return (tuple(rot_mat[:, 0]), tuple(rot_mat[:, 1]))
+
+
+def quat_to_wxyz(
+    xyzw: Union[numpy.ndarray, Tuple[float, float, float, float]]
+) -> numpy.ndarray:
+
+    if isinstance(xyzw, tuple):
+        return (xyzw[3], xyzw[0], xyzw[1], xyzw[2])
+
+    return xyzw[[3, 0, 1, 2]]
+
+
+def quat_to_xyzw(
+    wxyz: Union[numpy.ndarray, Tuple[float, float, float, float]]
+) -> numpy.ndarray:
+
+    if isinstance(wxyz, tuple):
+        return (wxyz[1], wxyz[2], wxyz[3], wxyz[0])
+
+    return wxyz[[1, 2, 3, 0]]
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/gazebo.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/gazebo.py
@ -0,0 +1,260 @@
+from typing import Tuple, Union
+
+from gym_gz.scenario.model_wrapper import ModelWrapper
+from numpy import exp
+from scenario.bindings.gazebo import Link, World
+from scipy.spatial.transform import Rotation
+
+from rbs_gym.envs.utils.conversions import quat_to_wxyz, quat_to_xyzw
+from rbs_gym.envs.utils.math import quat_mul
+
+
+def get_model_pose(
+    world: World,
+    model: Union[ModelWrapper, str],
+    link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+    """
+    Return pose of model's link. Orientation is represented as wxyz quaternion or xyzw based on the passed argument `xyzw`.
+    """
+
+    if isinstance(model, str):
+        # Get reference to the model from its name if string is passed
+        model = world.to_gazebo().get_model(model).to_gazebo()
+
+    if link is None:
+        # Use the first link if not specified
+        link = model.get_link(link_name=model.link_names()[0])
+    elif isinstance(link, str):
+        # Get reference to the link from its name if string
+        link = model.get_link(link_name=link)
+
+    # Get position and orientation
+    position = link.position()
+    quat = link.orientation()
+
+    # Convert to xyzw order if desired
+    if xyzw:
+        quat = quat_to_xyzw(quat)
+
+    # Return pose of the model's link
+    return (
+        position,
+        quat,
+    )
+
+
+def get_model_position(
+    world: World,
+    model: Union[ModelWrapper, str],
+    link: Union[Link, str, None] = None,
+) -> Tuple[float, float, float]:
+    """
+    Return position of model's link.
+    """
+
+    if isinstance(model, str):
+        # Get reference to the model from its name if string is passed
+        model = world.to_gazebo().get_model(model).to_gazebo()
+
+    if link is None:
+        # Use the first link if not specified
+        link = model.get_link(link_name=model.link_names()[0])
+    elif isinstance(link, str):
+        # Get reference to the link from its name if string
+        link = model.get_link(link_name=link)
+
+    # Return position of the model's link
+    return link.position()
+
+
+def get_model_orientation(
+    world: World,
+    model: Union[ModelWrapper, str],
+    link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[float, float, float, float]:
+    """
+    Return orientation of model's link that is represented as wxyz quaternion or xyzw based on the passed argument `xyzw`.
+    """
+
+    if isinstance(model, str):
+        # Get reference to the model from its name if string is passed
+        model = world.to_gazebo().get_model(model).to_gazebo()
+
+    if link is None:
+        # Use the first link if not specified
+        link = model.get_link(link_name=model.link_names()[0])
+    elif isinstance(link, str):
+        # Get reference to the link from its name if string
+        link = model.get_link(link_name=link)
+
+    # Get orientation
+    quat = link.orientation()
+
+    # Convert to xyzw order if desired
+    if xyzw:
+        quat = quat_to_xyzw(quat)
+
+    # Return orientation of the model's link
+    return quat
+
+
+def transform_move_to_model_pose(
+    world: World,
+    position: Tuple[float, float, float],
+    quat: Tuple[float, float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+    """
+    Transform such that original `position` and `quat` are represented with respect to `target_model::target_link`.
+    The resulting pose is still represented in world coordinate system.
+    """
+
+    target_frame_position, target_frame_quat = get_model_pose(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=True,
+    )
+
+    transformed_position = Rotation.from_quat(target_frame_quat).apply(position)
+    transformed_position = (
+        transformed_position[0] + target_frame_position[0],
+        transformed_position[1] + target_frame_position[1],
+        transformed_position[2] + target_frame_position[2],
+    )
+
+    if not xyzw:
+        target_frame_quat = quat_to_wxyz(target_frame_quat)
+    transformed_quat = quat_mul(quat, target_frame_quat, xyzw=xyzw)
+
+    return (transformed_position, transformed_quat)
+
+
+def transform_move_to_model_position(
+    world: World,
+    position: Tuple[float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+
+    target_frame_position, target_frame_quat_xyzw = get_model_pose(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=True,
+    )
+
+    transformed_position = Rotation.from_quat(target_frame_quat_xyzw).apply(position)
+    transformed_position = (
+        target_frame_position[0] + transformed_position[0],
+        target_frame_position[1] + transformed_position[1],
+        target_frame_position[2] + transformed_position[2],
+    )
+
+    return transformed_position
+
+
+def transform_move_to_model_orientation(
+    world: World,
+    quat: Tuple[float, float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+
+    target_frame_quat = get_model_orientation(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=xyzw,
+    )
+
+    transformed_quat = quat_mul(quat, target_frame_quat, xyzw=xyzw)
+
+    return transformed_quat
+
+
+def transform_change_reference_frame_pose(
+    world: World,
+    position: Tuple[float, float, float],
+    quat: Tuple[float, float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+    """
+    Change reference frame of original `position` and `quat` from world coordinate system to `target_model::target_link` coordinate system.
+    """
+
+    target_frame_position, target_frame_quat = get_model_pose(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=True,
+    )
+
+    transformed_position = (
+        position[0] - target_frame_position[0],
+        position[1] - target_frame_position[1],
+        position[2] - target_frame_position[2],
+    )
+    transformed_position = Rotation.from_quat(target_frame_quat).apply(
+        transformed_position, inverse=True
+    )
+
+    if not xyzw:
+        target_frame_quat = quat_to_wxyz(target_frame_quat)
+    transformed_quat = quat_mul(target_frame_quat, quat, xyzw=xyzw)
+
+    return (tuple(transformed_position), transformed_quat)
+
+
+def transform_change_reference_frame_position(
+    world: World,
+    position: Tuple[float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+
+    target_frame_position, target_frame_quat_xyzw = get_model_pose(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=True,
+    )
+
+    transformed_position = (
+        position[0] - target_frame_position[0],
+        position[1] - target_frame_position[1],
+        position[2] - target_frame_position[2],
+    )
+    transformed_position = Rotation.from_quat(target_frame_quat_xyzw).apply(
+        transformed_position, inverse=True
+    )
+
+    return tuple(transformed_position)
+
+
+def transform_change_reference_frame_orientation(
+    world: World,
+    quat: Tuple[float, float, float, float],
+    target_model: Union[ModelWrapper, str],
+    target_link: Union[Link, str, None] = None,
+    xyzw: bool = False,
+) -> Tuple[Tuple[float, float, float], Tuple[float, float, float, float]]:
+
+    target_frame_quat = get_model_orientation(
+        world,
+        model=target_model,
+        link=target_link,
+        xyzw=xyzw,
+    )
+
+    transformed_quat = quat_mul(target_frame_quat, quat, xyzw=xyzw)
+
+    return transformed_quat
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/logging.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/logging.py
@ -0,0 +1,25 @@
+from typing import Union
+
+from gymnasium import logger as gym_logger
+from gym_gz.utils import logger as gym_ign_logger
+
+
+def set_log_level(log_level: Union[int, str]):
+    """
+    Set log level for (Gym) Ignition.
+    """
+
+    if not isinstance(log_level, int):
+        log_level = str(log_level).upper()
+
+        if "WARNING" == log_level:
+            log_level = "WARN"
+        elif not log_level in ["DEBUG", "INFO", "WARN", "ERROR", "DISABLED"]:
+            log_level = "DISABLED"
+
+        log_level = getattr(gym_logger, log_level)
+
+    gym_ign_logger.set_level(
+        level=log_level,
+        scenario_level=log_level,
+    )
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/math.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/math.py
@ -0,0 +1,46 @@
+from typing import List, Tuple
+
+import numpy as np
+
+
+def quat_mul(
+    quat_0: Tuple[float, float, float, float],
+    quat_1: Tuple[float, float, float, float],
+    xyzw: bool = True,
+) -> Tuple[float, float, float, float]:
+    """
+    Multiply two quaternions
+    """
+    if xyzw:
+        x0, y0, z0, w0 = quat_0
+        x1, y1, z1, w1 = quat_1
+        return (
+            x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
+            -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
+            x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0,
+            -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
+        )
+    else:
+        w0, x0, y0, z0 = quat_0
+        w1, x1, y1, z1 = quat_1
+        return (
+            -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
+            x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
+            -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
+            x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0,
+        )
+
+
+def distance_to_nearest_point(
+    origin: Tuple[float, float, float], points: List[Tuple[float, float, float]]
+) -> float:
+
+    return np.linalg.norm(np.array(points) - np.array(origin), axis=1).min()
+
+
+def get_nearest_point(
+    origin: Tuple[float, float, float], points: List[Tuple[float, float, float]]
+) -> Tuple[float, float, float]:
+
+    target_distances = np.linalg.norm(np.array(points) - np.array(origin), axis=1)
+    return points[target_distances.argmin()]
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_broadcaster.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_broadcaster.py
@ -0,0 +1,74 @@
+import sys
+from typing import Tuple
+
+import rclpy
+from geometry_msgs.msg import TransformStamped
+from rclpy.node import Node
+from rclpy.parameter import Parameter
+from tf2_ros import StaticTransformBroadcaster
+
+
+class Tf2Broadcaster:
+    def __init__(
+        self,
+        node: Node,
+    ):
+
+        self._node = node
+        self.__tf2_broadcaster = StaticTransformBroadcaster(node=self._node)
+        self._transform_stamped = TransformStamped()
+
+    def broadcast_tf(
+        self,
+        parent_frame_id: str,
+        child_frame_id: str,
+        translation: Tuple[float, float, float],
+        rotation: Tuple[float, float, float, float],
+        xyzw: bool = True,
+    ):
+        """
+        Broadcast transformation of the camera
+        """
+
+        self._transform_stamped.header.frame_id = parent_frame_id
+        self._transform_stamped.child_frame_id = child_frame_id
+
+        self._transform_stamped.header.stamp = self._node.get_clock().now().to_msg()
+
+        self._transform_stamped.transform.translation.x = float(translation[0])
+        self._transform_stamped.transform.translation.y = float(translation[1])
+        self._transform_stamped.transform.translation.z = float(translation[2])
+
+        if xyzw:
+            self._transform_stamped.transform.rotation.x = float(rotation[0])
+            self._transform_stamped.transform.rotation.y = float(rotation[1])
+            self._transform_stamped.transform.rotation.z = float(rotation[2])
+            self._transform_stamped.transform.rotation.w = float(rotation[3])
+        else:
+            self._transform_stamped.transform.rotation.w = float(rotation[0])
+            self._transform_stamped.transform.rotation.x = float(rotation[1])
+            self._transform_stamped.transform.rotation.y = float(rotation[2])
+            self._transform_stamped.transform.rotation.z = float(rotation[3])
+
+        self.__tf2_broadcaster.sendTransform(self._transform_stamped)
+
+
+class Tf2BroadcasterStandalone(Node, Tf2Broadcaster):
+    def __init__(
+        self,
+        node_name: str = "rbs_gym_tf_broadcaster",
+        use_sim_time: bool = True,
+    ):
+
+        try:
+            rclpy.init()
+        except Exception as e:
+            if not rclpy.ok():
+                sys.exit(f"ROS 2 context could not be initialised: {e}")
+
+        Node.__init__(self, node_name)
+        self.set_parameters(
+            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
+        )
+
+        Tf2Broadcaster.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_listener.py
+++ b/env_manager/rbs_gym/rbs_gym/envs/utils/tf2_listener.py
@ -0,0 +1,74 @@
+import sys
+from typing import Optional
+
+import rclpy
+from geometry_msgs.msg import Transform
+from rclpy.node import Node
+from rclpy.parameter import Parameter
+from tf2_ros import Buffer, TransformListener
+
+
+class Tf2Listener:
+    def __init__(
+        self,
+        node: Node,
+    ):
+
+        self._node = node
+
+        # Create tf2 buffer and listener for transform lookup
+        self.__tf2_buffer = Buffer()
+        TransformListener(buffer=self.__tf2_buffer, node=node)
+
+    def lookup_transform_sync(
+        self, target_frame: str, source_frame: str, retry: bool = True
+    ) -> Optional[Transform]:
+
+        try:
+            return self.__tf2_buffer.lookup_transform(
+                target_frame=target_frame,
+                source_frame=source_frame,
+                time=rclpy.time.Time(),
+            ).transform
+        except:
+            if retry:
+                while rclpy.ok():
+                    if self.__tf2_buffer.can_transform(
+                        target_frame=target_frame,
+                        source_frame=source_frame,
+                        time=rclpy.time.Time(),
+                        timeout=rclpy.time.Duration(seconds=1, nanoseconds=0),
+                    ):
+                        return self.__tf2_buffer.lookup_transform(
+                            target_frame=target_frame,
+                            source_frame=source_frame,
+                            time=rclpy.time.Time(),
+                        ).transform
+
+                    self._node.get_logger().warn(
+                        f'Lookup of transform from "{source_frame}"'
+                        f' to "{target_frame}" failed, retrying...'
+                    )
+            else:
+                return None
+
+
+class Tf2ListenerStandalone(Node, Tf2Listener):
+    def __init__(
+        self,
+        node_name: str = "rbs_gym_tf_listener",
+        use_sim_time: bool = True,
+    ):
+
+        try:
+            rclpy.init()
+        except Exception as e:
+            if not rclpy.ok():
+                sys.exit(f"ROS 2 context could not be initialised: {e}")
+
+        Node.__init__(self, node_name)
+        self.set_parameters(
+            [Parameter("use_sim_time", type_=Parameter.Type.BOOL, value=use_sim_time)]
+        )
+
+        Tf2Listener.__init__(self, node=self)
--- a/env_manager/rbs_gym/rbs_gym/envs/worlds/default.sdf
+++ b/env_manager/rbs_gym/rbs_gym/envs/worlds/default.sdf
@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<sdf version="1.9">
+  <world name="rbs_gym_world">
+    <!-- <physics name='1ms' type='ignored'> -->
+    <!--   <dart> -->
+    <!--     <collision_detector>bullet</collision_detector> -->
+    <!--     <solver> -->
+    <!--       <solver_type>pgs</solver_type> -->
+    <!--     </solver> -->
+    <!--   </dart> -->
+    <!-- </physics> -->
+    <!-- <plugin name='ignition::gazebo::systems::Contact' filename='ignition-gazebo-contact-system'/> -->
+
+    <!--       -->
+    <!-- Scene -->
+    <!--       -->
+    <gravity>0 0 0</gravity>
+    <scene>
+      <ambient>1.0 1.0 1.0</ambient>
+      <grid>false</grid>
+    </scene>
+
+  </world>
+</sdf>
--- a/env_manager/rbs_gym/rbs_gym/utils/init.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/init.py
@ -0,0 +1,9 @@
+from .utils import (
+    ALGOS,
+    create_test_env,
+    get_latest_run_id,
+    get_saved_hyperparams,
+    get_trained_models,
+    get_wrapper_class,
+    linear_schedule,
+)
--- a/env_manager/rbs_gym/rbs_gym/utils/callbacks.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/callbacks.py
@ -0,0 +1,306 @@
+import os
+import tempfile
+import time
+from copy import deepcopy
+from functools import wraps
+from threading import Thread
+from typing import Optional
+
+import optuna
+from sb3_contrib import TQC
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import (
+    BaseCallback,
+    CheckpointCallback,
+    EvalCallback,
+)
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv
+
+
+class TrialEvalCallback(EvalCallback):
+    """
+    Callback used for evaluating and reporting a trial.
+    """
+
+    def __init__(
+        self,
+        eval_env: VecEnv,
+        trial: optuna.Trial,
+        n_eval_episodes: int = 5,
+        eval_freq: int = 10000,
+        deterministic: bool = True,
+        verbose: int = 0,
+        best_model_save_path: Optional[str] = None,
+        log_path: Optional[str] = None,
+    ):
+
+        super(TrialEvalCallback, self).__init__(
+            eval_env=eval_env,
+            n_eval_episodes=n_eval_episodes,
+            eval_freq=eval_freq,
+            deterministic=deterministic,
+            verbose=verbose,
+            best_model_save_path=best_model_save_path,
+            log_path=log_path,
+        )
+        self.trial = trial
+        self.eval_idx = 0
+        self.is_pruned = False
+
+    def _on_step(self) -> bool:
+        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
+            print("Evaluating trial")
+            super(TrialEvalCallback, self)._on_step()
+            self.eval_idx += 1
+            # report best or report current ?
+            # report num_timesteps or elasped time ?
+            self.trial.report(self.last_mean_reward, self.eval_idx)
+            # Prune trial if need
+            if self.trial.should_prune():
+                self.is_pruned = True
+                return False
+        return True
+
+
+class ParallelTrainCallback(BaseCallback):
+    """
+    Callback to explore (collect experience) and train (do gradient steps)
+    at the same time using two separate threads.
+    Normally used with off-policy algorithms and `train_freq=(1, "episode")`.
+
+    - blocking mode: wait for the model to finish updating the policy before collecting new experience
+        at the end of a rollout
+    - force sync mode: stop training to update to the latest policy for collecting
+        new experience
+
+    :param gradient_steps: Number of gradient steps to do before
+        sending the new policy
+    :param verbose: Verbosity level
+    :param sleep_time: Limit the fps in the thread collecting experience.
+    """
+
+    def __init__(
+        self, gradient_steps: int = 100, verbose: int = 0, sleep_time: float = 0.0
+    ):
+        super(ParallelTrainCallback, self).__init__(verbose)
+        self.batch_size = 0
+        self._model_ready = True
+        self._model = None
+        self.gradient_steps = gradient_steps
+        self.process = None
+        self.model_class = None
+        self.sleep_time = sleep_time
+
+    def _init_callback(self) -> None:
+        temp_file = tempfile.TemporaryFile()
+
+        # Windows TemporaryFile is not a io Buffer
+        # we save the model in the logs/ folder
+        if os.name == "nt":
+            temp_file = os.path.join("logs", "model_tmp.zip")
+
+        self.model.save(temp_file)
+
+        # TODO (external): add support for other algorithms
+        for model_class in [SAC, TQC]:
+            if isinstance(self.model, model_class):
+                self.model_class = model_class
+                break
+
+        assert (
+            self.model_class is not None
+        ), f"{self.model} is not supported for parallel training"
+        self._model = self.model_class.load(temp_file)
+
+        self.batch_size = self._model.batch_size
+
+        # Disable train method
+        def patch_train(function):
+            @wraps(function)
+            def wrapper(*args, **kwargs):
+                return
+
+            return wrapper
+
+        # Add logger for parallel training
+        self._model.set_logger(self.model.logger)
+        self.model.train = patch_train(self.model.train)
+
+        # Hack: Re-add correct values at save time
+        def patch_save(function):
+            @wraps(function)
+            def wrapper(*args, **kwargs):
+                return self._model.save(*args, **kwargs)
+
+            return wrapper
+
+        self.model.save = patch_save(self.model.save)
+
+    def train(self) -> None:
+        self._model_ready = False
+
+        self.process = Thread(target=self._train_thread, daemon=True)
+        self.process.start()
+
+    def _train_thread(self) -> None:
+        self._model.train(
+            gradient_steps=self.gradient_steps, batch_size=self.batch_size
+        )
+        self._model_ready = True
+
+    def _on_step(self) -> bool:
+        if self.sleep_time > 0:
+            time.sleep(self.sleep_time)
+        return True
+
+    def _on_rollout_end(self) -> None:
+        if self._model_ready:
+            self._model.replay_buffer = deepcopy(self.model.replay_buffer)
+            self.model.set_parameters(deepcopy(self._model.get_parameters()))
+            self.model.actor = self.model.policy.actor
+            if self.num_timesteps >= self._model.learning_starts:
+                self.train()
+            # Do not wait for the training loop to finish
+            # self.process.join()
+
+    def _on_training_end(self) -> None:
+        # Wait for the thread to terminate
+        if self.process is not None:
+            if self.verbose > 0:
+                print("Waiting for training thread to terminate")
+            self.process.join()
+
+
+class SaveVecNormalizeCallback(BaseCallback):
+    """
+    Callback for saving a VecNormalize wrapper every ``save_freq`` steps
+
+    :param save_freq: (int)
+    :param save_path: (str) Path to the folder where ``VecNormalize`` will be saved, as ``vecnormalize.pkl``
+    :param name_prefix: (str) Common prefix to the saved ``VecNormalize``, if None (default)
+        only one file will be kept.
+    """
+
+    def __init__(
+        self,
+        save_freq: int,
+        save_path: str,
+        name_prefix: Optional[str] = None,
+        verbose: int = 0,
+    ):
+        super(SaveVecNormalizeCallback, self).__init__(verbose)
+        self.save_freq = save_freq
+        self.save_path = save_path
+        self.name_prefix = name_prefix
+
+    def _init_callback(self) -> None:
+        # Create folder if needed
+        if self.save_path is not None:
+            os.makedirs(self.save_path, exist_ok=True)
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.save_freq == 0:
+            if self.name_prefix is not None:
+                path = os.path.join(
+                    self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps.pkl"
+                )
+            else:
+                path = os.path.join(self.save_path, "vecnormalize.pkl")
+            if self.model.get_vec_normalize_env() is not None:
+                self.model.get_vec_normalize_env().save(path)
+                if self.verbose > 1:
+                    print(f"Saving VecNormalize to {path}")
+        return True
+
+
+class CheckpointCallbackWithReplayBuffer(CheckpointCallback):
+    """
+    Callback for saving a model every ``save_freq`` steps
+    :param save_freq:
+    :param save_path: Path to the folder where the model will be saved.
+    :param name_prefix: Common prefix to the saved models
+    :param save_replay_buffer: If enabled, save replay buffer together with model (if supported by algorithm).
+    :param verbose:
+    """
+
+    def __init__(
+        self,
+        save_freq: int,
+        save_path: str,
+        name_prefix: str = "rl_model",
+        save_replay_buffer: bool = False,
+        verbose: int = 0,
+    ):
+        super(CheckpointCallbackWithReplayBuffer, self).__init__(
+            save_freq, save_path, name_prefix, verbose
+        )
+        self.save_replay_buffer = save_replay_buffer
+        # self.save_replay_buffer = hasattr(self.model, "save_replay_buffer") and save_replay_buffer
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.save_freq == 0:
+            path = os.path.join(
+                self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps"
+            )
+            self.model.save(path)
+            if self.verbose > 0:
+                print(f"Saving model checkpoint to {path}")
+            if self.save_replay_buffer:
+                path_replay_buffer = os.path.join(self.save_path, "replay_buffer.pkl")
+                self.model.save_replay_buffer(path_replay_buffer)
+                if self.verbose > 0:
+                    print(f"Saving model checkpoint to {path_replay_buffer}")
+        return True
+
+
+class CurriculumLoggerCallback(BaseCallback):
+    """
+    Custom callback for logging curriculum values.
+    """
+
+    def __init__(self, verbose=0):
+        super(CurriculumLoggerCallback, self).__init__(verbose)
+
+    def _on_step(self) -> bool:
+
+        for infos in self.locals["infos"]:
+            for info_key, info_value in infos.items():
+                if not (
+                    info_key.startswith("curriculum")
+                    and info_key.count("__mean_step__")
+                ):
+                    continue
+
+                self.logger.record_mean(
+                    key=info_key.replace("__mean_step__", ""), value=info_value
+                )
+
+        return True
+
+    def _on_rollout_end(self) -> None:
+
+        for infos in self.locals["infos"]:
+            for info_key, info_value in infos.items():
+                if not info_key.startswith("curriculum"):
+                    continue
+                if info_key.count("__mean_step__"):
+                    continue
+
+                if info_key.count("__mean_episode__"):
+                    self.logger.record_mean(
+                        key=info_key.replace("__mean_episode__", ""), value=info_value
+                    )
+                else:
+                    if isinstance(info_value, str):
+                        exclude = "tensorboard"
+                    else:
+                        exclude = None
+                    self.logger.record(key=info_key, value=info_value, exclude=exclude)
+
+
+class MetricsCallback(BaseCallback):
+    def __init__(self, verbose: int = 0):
+        super(MetricsCallback, self).__init__(verbose)
+
+    def _on_step(self) -> bool:
+        pass
--- a/env_manager/rbs_gym/rbs_gym/utils/exp_manager.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/exp_manager.py
@ -0,0 +1,931 @@
+import argparse
+import os
+import pickle as pkl
+import time
+import warnings
+from collections import OrderedDict
+from pprint import pprint
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import gymnasium as gym
+import numpy as np
+import optuna
+import yaml
+from optuna.integration.skopt import SkoptSampler
+from optuna.pruners import BasePruner, MedianPruner, NopPruner, SuccessiveHalvingPruner
+from optuna.samplers import BaseSampler, RandomSampler, TPESampler
+from optuna.visualization import plot_optimization_history, plot_param_importances
+from stable_baselines3 import HerReplayBuffer
+from stable_baselines3.common.base_class import BaseAlgorithm
+from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.noise import (
+    NormalActionNoise,
+    OrnsteinUhlenbeckActionNoise,
+)
+from stable_baselines3.common.preprocessing import (
+    is_image_space,
+    is_image_space_channels_first,
+)
+from stable_baselines3.common.utils import constant_fn
+from stable_baselines3.common.vec_env import (
+    DummyVecEnv,
+    SubprocVecEnv,
+    VecEnv,
+    VecFrameStack,
+    VecNormalize,
+    VecTransposeImage,
+    is_vecenv_wrapped,
+)
+from torch import nn as nn
+
+from rbs_gym.utils.callbacks import (
+    CheckpointCallbackWithReplayBuffer,
+    SaveVecNormalizeCallback,
+    TrialEvalCallback,
+)
+from rbs_gym.utils.hyperparams_opt import HYPERPARAMS_SAMPLER
+from rbs_gym.utils.utils import (
+    ALGOS,
+    get_callback_list,
+    get_latest_run_id,
+    get_wrapper_class,
+    linear_schedule,
+)
+
+
+class ExperimentManager(object):
+    """
+    Experiment manager: read the hyperparameters,
+    preprocess them, create the environment and the RL model.
+
+    Please take a look at `train.py` to have the details for each argument.
+    """
+
+    def __init__(
+        self,
+        args: argparse.Namespace,
+        algo: str,
+        env_id: str,
+        log_folder: str,
+        tensorboard_log: str = "",
+        n_timesteps: int = 0,
+        eval_freq: int = 10000,
+        n_eval_episodes: int = 5,
+        save_freq: int = -1,
+        hyperparams: Optional[Dict[str, Any]] = None,
+        env_kwargs: Optional[Dict[str, Any]] = None,
+        trained_agent: str = "",
+        optimize_hyperparameters: bool = False,
+        storage: Optional[str] = None,
+        study_name: Optional[str] = None,
+        n_trials: int = 1,
+        n_jobs: int = 1,
+        sampler: str = "tpe",
+        pruner: str = "median",
+        optimization_log_path: Optional[str] = None,
+        n_startup_trials: int = 0,
+        n_evaluations: int = 1,
+        truncate_last_trajectory: bool = False,
+        uuid_str: str = "",
+        seed: int = 0,
+        log_interval: int = 0,
+        save_replay_buffer: bool = False,
+        preload_replay_buffer: str = "",
+        verbose: int = 1,
+        vec_env_type: str = "dummy",
+        n_eval_envs: int = 1,
+        no_optim_plots: bool = False,
+    ):
+        super(ExperimentManager, self).__init__()
+        self.algo = algo
+        self.env_id = env_id
+        # Custom params
+        self.custom_hyperparams = hyperparams
+        self.env_kwargs = {} if env_kwargs is None else env_kwargs
+        self.n_timesteps = n_timesteps
+        self.normalize = False
+        self.normalize_kwargs = {}
+        self.env_wrapper = None
+        self.frame_stack = None
+        self.seed = seed
+        self.optimization_log_path = optimization_log_path
+
+        self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[
+            vec_env_type
+        ]
+
+        self.vec_env_kwargs = {}
+        # self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}
+
+        # Callbacks
+        self.specified_callbacks = []
+        self.callbacks = []
+        self.save_freq = save_freq
+        self.eval_freq = eval_freq
+        self.n_eval_episodes = n_eval_episodes
+        self.n_eval_envs = n_eval_envs
+
+        self.n_envs = 1  # it will be updated when reading hyperparams
+        self.n_actions = None  # For DDPG/TD3 action noise objects
+        self._hyperparams = {}
+
+        self.trained_agent = trained_agent
+        self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(
+            trained_agent
+        )
+        self.truncate_last_trajectory = truncate_last_trajectory
+
+        self.preload_replay_buffer = preload_replay_buffer
+
+        self._is_atari = self.is_atari(env_id)
+        self._is_gazebo_env = self.is_gazebo_env(env_id)
+
+        # Hyperparameter optimization config
+        self.optimize_hyperparameters = optimize_hyperparameters
+        self.storage = storage
+        self.study_name = study_name
+        self.no_optim_plots = no_optim_plots
+        # maximum number of trials for finding the best hyperparams
+        self.n_trials = n_trials
+        # number of parallel jobs when doing hyperparameter search
+        self.n_jobs = n_jobs
+        self.sampler = sampler
+        self.pruner = pruner
+        self.n_startup_trials = n_startup_trials
+        self.n_evaluations = n_evaluations
+        self.deterministic_eval = not self.is_atari(self.env_id)
+
+        # Logging
+        self.log_folder = log_folder
+        self.tensorboard_log = (
+            None if tensorboard_log == "" else os.path.join(tensorboard_log, env_id)
+        )
+        self.verbose = verbose
+        self.args = args
+        self.log_interval = log_interval
+        self.save_replay_buffer = save_replay_buffer
+
+        self.log_path = f"{log_folder}/{self.algo}/"
+        self.save_path = os.path.join(
+            self.log_path,
+            f"{self.env_id}_{get_latest_run_id(self.log_path, self.env_id) + 1}{uuid_str}",
+        )
+        self.params_path = f"{self.save_path}/{self.env_id}"
+
+    def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
+        """
+        Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
+        create the environment and possibly the model.
+
+        :return: the initialized RL model
+        """
+        hyperparams, saved_hyperparams = self.read_hyperparameters()
+        hyperparams, self.env_wrapper, self.callbacks = self._preprocess_hyperparams(
+            hyperparams
+        )
+
+        # Create env to have access to action space for action noise
+        self._env = self.create_envs(self.n_envs, no_log=False)
+
+        self.create_log_folder()
+        self.create_callbacks()
+
+        self._hyperparams = self._preprocess_action_noise(hyperparams, self._env)
+
+        if self.continue_training:
+            model = self._load_pretrained_agent(self._hyperparams, self._env)
+        elif self.optimize_hyperparameters:
+            return None
+        else:
+            # Train an agent from scratch
+            model = ALGOS[self.algo](
+                env=self._env,
+                tensorboard_log=self.tensorboard_log,
+                seed=self.seed,
+                verbose=self.verbose,
+                **self._hyperparams,
+            )
+
+        # Pre-load replay buffer if enabled
+        if self.preload_replay_buffer:
+            if self.preload_replay_buffer.endswith(".pkl"):
+                replay_buffer_path = self.preload_replay_buffer
+            else:
+                replay_buffer_path = os.path.join(
+                    self.preload_replay_buffer, "replay_buffer.pkl"
+                )
+            if os.path.exists(replay_buffer_path):
+                print("Pre-loading replay buffer")
+                if self.algo == "her":
+                    model.load_replay_buffer(
+                        replay_buffer_path, self.truncate_last_trajectory
+                    )
+                else:
+                    model.load_replay_buffer(replay_buffer_path)
+            else:
+                raise Exception(f"Replay buffer {replay_buffer_path} " "does not exist")
+
+        self._save_config(saved_hyperparams)
+        return model, saved_hyperparams
+
+    def learn(self, model: BaseAlgorithm) -> None:
+        """
+        :param model: an initialized RL model
+        """
+        kwargs = {}
+        if self.log_interval > -1:
+            kwargs = {"log_interval": self.log_interval}
+
+        if len(self.callbacks) > 0:
+            kwargs["callback"] = self.callbacks
+
+        if self.continue_training:
+            kwargs["reset_num_timesteps"] = False
+            model.env.reset()
+
+        try:
+            model.learn(self.n_timesteps, **kwargs)
+        except Exception as e:
+            print(f"Caught an exception during training of the model: {e}")
+            self.save_trained_model(model)
+        finally:
+            # Release resources
+            try:
+                model.env.close()
+            except EOFError:
+                pass
+
+    def save_trained_model(self, model: BaseAlgorithm) -> None:
+        """
+        Save trained model optionally with its replay buffer
+        and ``VecNormalize`` statistics
+
+        :param model:
+        """
+        print(f"Saving to {self.save_path}")
+        model.save(f"{self.save_path}/{self.env_id}")
+
+        if hasattr(model, "save_replay_buffer") and self.save_replay_buffer:
+            print("Saving replay buffer")
+            model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl"))
+
+        if self.normalize:
+            # Important: save the running average, for testing the agent we need that normalization
+            model.get_vec_normalize_env().save(
+                os.path.join(self.params_path, "vecnormalize.pkl")
+            )
+
+    def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
+        """
+        Save unprocessed hyperparameters, this can be use later
+        to reproduce an experiment.
+
+        :param saved_hyperparams:
+        """
+        # Save hyperparams
+        with open(os.path.join(self.params_path, "config.yml"), "w") as f:
+            yaml.dump(saved_hyperparams, f)
+
+        # save command line arguments
+        with open(os.path.join(self.params_path, "args.yml"), "w") as f:
+            ordered_args = OrderedDict(
+                [(key, vars(self.args)[key]) for key in sorted(vars(self.args).keys())]
+            )
+            yaml.dump(ordered_args, f)
+
+        print(f"Log path: {self.save_path}")
+
+    def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        # Load hyperparameters from yaml file
+        hyperparams_dir = os.path.abspath(
+            os.path.join(
+                os.path.realpath(__file__), *3 * [os.path.pardir], "hyperparams"
+            )
+        )
+        with open(f"{hyperparams_dir}/{self.algo}.yml", "r") as f:
+            hyperparams_dict = yaml.safe_load(f)
+            if self.env_id in list(hyperparams_dict.keys()):
+                hyperparams = hyperparams_dict[self.env_id]
+            elif self._is_atari:
+                hyperparams = hyperparams_dict["atari"]
+            else:
+                raise ValueError(
+                    f"Hyperparameters not found for {self.algo}-{self.env_id}"
+                )
+
+        if self.custom_hyperparams is not None:
+            # Overwrite hyperparams if needed
+            hyperparams.update(self.custom_hyperparams)
+        # Sort hyperparams that will be saved
+        saved_hyperparams = OrderedDict(
+            [(key, hyperparams[key]) for key in sorted(hyperparams.keys())]
+        )
+
+        if self.verbose > 0:
+            print(
+                "Default hyperparameters for environment (ones being tuned will be overridden):"
+            )
+            pprint(saved_hyperparams)
+
+        return hyperparams, saved_hyperparams
+
+    @staticmethod
+    def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
+        # Create schedules
+        for key in ["learning_rate", "clip_range", "clip_range_vf"]:
+            if key not in hyperparams:
+                continue
+            if isinstance(hyperparams[key], str):
+                schedule, initial_value = hyperparams[key].split("_")
+                initial_value = float(initial_value)
+                hyperparams[key] = linear_schedule(initial_value)
+            elif isinstance(hyperparams[key], (float, int)):
+                # Negative value: ignore (ex: for clipping)
+                if hyperparams[key] < 0:
+                    continue
+                hyperparams[key] = constant_fn(float(hyperparams[key]))
+            else:
+                raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
+        return hyperparams
+
+    def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
+        if "normalize" in hyperparams.keys():
+            self.normalize = hyperparams["normalize"]
+
+            # Special case, instead of both normalizing
+            # both observation and reward, we can normalize one of the two.
+            # in that case `hyperparams["normalize"]` is a string
+            # that can be evaluated as python,
+            # ex: "dict(norm_obs=False, norm_reward=True)"
+            if isinstance(self.normalize, str):
+                self.normalize_kwargs = eval(self.normalize)
+                self.normalize = True
+
+            # Use the same discount factor as for the algorithm
+            if "gamma" in hyperparams:
+                self.normalize_kwargs["gamma"] = hyperparams["gamma"]
+
+            del hyperparams["normalize"]
+        return hyperparams
+
+    def _preprocess_hyperparams(
+        self, hyperparams: Dict[str, Any]
+    ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback]]:
+        self.n_envs = hyperparams.get("n_envs", 1)
+
+        if self.verbose > 0:
+            print(f"Using {self.n_envs} environments")
+
+        # Convert schedule strings to objects
+        hyperparams = self._preprocess_schedules(hyperparams)
+
+        # Pre-process train_freq
+        if "train_freq" in hyperparams and isinstance(hyperparams["train_freq"], list):
+            hyperparams["train_freq"] = tuple(hyperparams["train_freq"])
+
+        # Should we overwrite the number of timesteps?
+        if self.n_timesteps > 0:
+            if self.verbose:
+                print(f"Overwriting n_timesteps with n={self.n_timesteps}")
+        else:
+            self.n_timesteps = int(hyperparams["n_timesteps"])
+
+        # Pre-process normalize config
+        hyperparams = self._preprocess_normalization(hyperparams)
+
+        # Pre-process policy/buffer keyword arguments
+        # Convert to python object if needed
+        # TODO: Use the new replay_buffer_class argument of offpolicy algorithms instead of monkey patch
+        for kwargs_key in {
+            "policy_kwargs",
+            "replay_buffer_class",
+            "replay_buffer_kwargs",
+        }:
+            if kwargs_key in hyperparams.keys() and isinstance(
+                hyperparams[kwargs_key], str
+            ):
+                hyperparams[kwargs_key] = eval(hyperparams[kwargs_key])
+
+        # Delete keys so the dict can be pass to the model constructor
+        if "n_envs" in hyperparams.keys():
+            del hyperparams["n_envs"]
+        del hyperparams["n_timesteps"]
+
+        if "frame_stack" in hyperparams.keys():
+            self.frame_stack = hyperparams["frame_stack"]
+            del hyperparams["frame_stack"]
+
+        # obtain a class object from a wrapper name string in hyperparams
+        # and delete the entry
+        env_wrapper = get_wrapper_class(hyperparams)
+        if "env_wrapper" in hyperparams.keys():
+            del hyperparams["env_wrapper"]
+
+        callbacks = get_callback_list(hyperparams)
+        if "callback" in hyperparams.keys():
+            self.specified_callbacks = hyperparams["callback"]
+            del hyperparams["callback"]
+
+        return hyperparams, env_wrapper, callbacks
+
+    def _preprocess_action_noise(
+        self, hyperparams: Dict[str, Any], env: VecEnv
+    ) -> Dict[str, Any]:
+        # Parse noise string
+        # Note: only off-policy algorithms are supported
+        if hyperparams.get("noise_type") is not None:
+            noise_type = hyperparams["noise_type"].strip()
+            noise_std = hyperparams["noise_std"]
+
+            # Save for later (hyperparameter optimization)
+            self.n_actions = env.action_space.shape[0]
+
+            if "normal" in noise_type:
+                hyperparams["action_noise"] = NormalActionNoise(
+                    mean=np.zeros(self.n_actions),
+                    sigma=noise_std * np.ones(self.n_actions),
+                )
+            elif "ornstein-uhlenbeck" in noise_type:
+                hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(
+                    mean=np.zeros(self.n_actions),
+                    sigma=noise_std * np.ones(self.n_actions),
+                )
+            else:
+                raise RuntimeError(f'Unknown noise type "{noise_type}"')
+
+            print(f"Applying {noise_type} noise with std {noise_std}")
+
+            del hyperparams["noise_type"]
+            del hyperparams["noise_std"]
+
+        return hyperparams
+
+    def create_log_folder(self):
+        os.makedirs(self.params_path, exist_ok=True)
+
+    def create_callbacks(self):
+
+        if self.save_freq > 0:
+            # Account for the number of parallel environments
+            self.save_freq = max(self.save_freq // self.n_envs, 1)
+            self.callbacks.append(
+                CheckpointCallbackWithReplayBuffer(
+                    save_freq=self.save_freq,
+                    save_path=self.save_path,
+                    name_prefix="rl_model",
+                    save_replay_buffer=self.save_replay_buffer,
+                    verbose=self.verbose,
+                )
+            )
+
+        # Create test env if needed, do not normalize reward
+        if self.eval_freq > 0 and not self.optimize_hyperparameters:
+            # Account for the number of parallel environments
+            self.eval_freq = max(self.eval_freq // self.n_envs, 1)
+
+            if self.verbose > 0:
+                print("Creating test environment")
+
+            save_vec_normalize = SaveVecNormalizeCallback(
+                save_freq=1, save_path=self.params_path
+            )
+            eval_callback = EvalCallback(
+                eval_env=self._env,
+                # TODO: Use separate environment(s) for evaluation
+                # self.create_envs(self.n_eval_envs, eval_env=True),
+                callback_on_new_best=save_vec_normalize,
+                best_model_save_path=self.save_path,
+                n_eval_episodes=self.n_eval_episodes,
+                log_path=self.save_path,
+                eval_freq=self.eval_freq,
+                deterministic=self.deterministic_eval,
+            )
+
+            self.callbacks.append(eval_callback)
+
+    @staticmethod
+    def is_atari(env_id: str) -> bool:
+        entry_point = gym.envs.registry[env_id].entry_point
+        return "AtariEnv" in str(entry_point)
+
+    @staticmethod
+    def is_bullet(env_id: str) -> bool:
+        entry_point = gym.envs.registry[env_id].entry_point
+        return "pybullet_envs" in str(entry_point)
+
+    @staticmethod
+    def is_robotics_env(env_id: str) -> bool:
+        entry_point = gym.envs.registry[env_id].entry_point
+        return "gym.envs.robotics" in str(entry_point) or "panda_gym.envs" in str(
+            entry_point
+        )
+
+    @staticmethod
+    def is_gazebo_env(env_id: str) -> bool:
+        return "Gazebo" in gym.envs.registry[env_id].entry_point
+
+    def _maybe_normalize(self, env: VecEnv, eval_env: bool) -> VecEnv:
+        """
+        Wrap the env into a VecNormalize wrapper if needed
+        and load saved statistics when present.
+
+        :param env:
+        :param eval_env:
+        :return:
+        """
+        # Pretrained model, load normalization
+        path_ = os.path.join(os.path.dirname(self.trained_agent), self.env_id)
+        path_ = os.path.join(path_, "vecnormalize.pkl")
+
+        if os.path.exists(path_):
+            print("Loading saved VecNormalize stats")
+            env = VecNormalize.load(path_, env)
+            # Deactivate training and reward normalization
+            if eval_env:
+                env.training = False
+                env.norm_reward = False
+
+        elif self.normalize:
+            # Copy to avoid changing default values by reference
+            local_normalize_kwargs = self.normalize_kwargs.copy()
+            # Do not normalize reward for env used for evaluation
+            if eval_env:
+                if len(local_normalize_kwargs) > 0:
+                    local_normalize_kwargs["norm_reward"] = False
+                else:
+                    local_normalize_kwargs = {"norm_reward": False}
+
+            if self.verbose > 0:
+                if len(local_normalize_kwargs) > 0:
+                    print(f"Normalization activated: {local_normalize_kwargs}")
+                else:
+                    print("Normalizing input and reward")
+            # Note: The following line was added but not sure if it is still required
+            env.num_envs = self.n_envs
+            env = VecNormalize(env, **local_normalize_kwargs)
+        return env
+
+    def create_envs(
+        self, n_envs: int, eval_env: bool = False, no_log: bool = False
+    ) -> VecEnv:
+        """
+        Create the environment and wrap it if necessary.
+
+        :param n_envs:
+        :param eval_env: Whether is it an environment used for evaluation or not
+        :param no_log: Do not log training when doing hyperparameter optim
+            (issue with writing the same file)
+        :return: the vectorized environment, with appropriate wrappers
+        """
+        # Do not log eval env (issue with writing the same file)
+        log_dir = None if eval_env or no_log else self.save_path
+
+        monitor_kwargs = {}
+        # Special case for GoalEnvs: log success rate too
+        if (
+            "Neck" in self.env_id
+            or self.is_robotics_env(self.env_id)
+            or "parking-v0" in self.env_id
+        ):
+            monitor_kwargs = dict(info_keywords=("is_success",))
+
+        # On most env, SubprocVecEnv does not help and is quite memory hungry
+        # therefore we use DummyVecEnv by default
+        env = make_vec_env(
+            env_id=self.env_id,
+            n_envs=n_envs,
+            seed=self.seed,
+            env_kwargs=self.env_kwargs,
+            monitor_dir=log_dir,
+            wrapper_class=self.env_wrapper,
+            vec_env_cls=self.vec_env_class,
+            vec_env_kwargs=self.vec_env_kwargs,
+            monitor_kwargs=monitor_kwargs,
+        )
+
+        # Wrap the env into a VecNormalize wrapper if needed
+        # and load saved statistics when present
+        env = self._maybe_normalize(env, eval_env)
+
+        # Optional Frame-stacking
+        if self.frame_stack is not None:
+            n_stack = self.frame_stack
+            env = VecFrameStack(env, n_stack)
+            if self.verbose > 0:
+                print(f"Stacking {n_stack} frames")
+
+        if not is_vecenv_wrapped(env, VecTransposeImage):
+            wrap_with_vectranspose = False
+            if isinstance(env.observation_space, gym.spaces.Dict):
+                # If even one of the keys is a image-space in need of transpose, apply transpose
+                # If the image spaces are not consistent (for instance one is channel first,
+                # the other channel last), VecTransposeImage will throw an error
+                for space in env.observation_space.spaces.values():
+                    wrap_with_vectranspose = wrap_with_vectranspose or (
+                        is_image_space(space)
+                        and not is_image_space_channels_first(space)
+                    )
+            else:
+                wrap_with_vectranspose = is_image_space(
+                    env.observation_space
+                ) and not is_image_space_channels_first(env.observation_space)
+
+            if wrap_with_vectranspose:
+                if self.verbose >= 1:
+                    print("Wrapping the env in a VecTransposeImage.")
+                env = VecTransposeImage(env)
+
+        return env
+
+    def _load_pretrained_agent(
+        self, hyperparams: Dict[str, Any], env: VecEnv
+    ) -> BaseAlgorithm:
+        # Continue training
+        print(
+            f"Loading pretrained agent '{self.trained_agent}' to continue its training"
+        )
+        # Policy should not be changed
+        del hyperparams["policy"]
+
+        if "policy_kwargs" in hyperparams.keys():
+            del hyperparams["policy_kwargs"]
+
+        model = ALGOS[self.algo].load(
+            self.trained_agent,
+            env=env,
+            seed=self.seed,
+            tensorboard_log=self.tensorboard_log,
+            verbose=self.verbose,
+            **hyperparams,
+        )
+
+        replay_buffer_path = os.path.join(
+            os.path.dirname(self.trained_agent), "replay_buffer.pkl"
+        )
+
+        if not self.preload_replay_buffer and os.path.exists(replay_buffer_path):
+            print("Loading replay buffer")
+            # `truncate_last_traj` will be taken into account only if we use HER replay buffer
+            model.load_replay_buffer(
+                replay_buffer_path, truncate_last_traj=self.truncate_last_trajectory
+            )
+        return model
+
+    def _create_sampler(self, sampler_method: str) -> BaseSampler:
+        # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
+        if sampler_method == "random":
+            sampler = RandomSampler(seed=self.seed)
+        elif sampler_method == "tpe":
+            # TODO (external): try with multivariate=True
+            sampler = TPESampler(n_startup_trials=self.n_startup_trials, seed=self.seed)
+        elif sampler_method == "skopt":
+            # cf https://scikit-optimize.github.io/#skopt.Optimizer
+            # GP: gaussian process
+            # Gradient boosted regression: GBRT
+            sampler = SkoptSampler(
+                skopt_kwargs={"base_estimator": "GP", "acq_func": "gp_hedge"}
+            )
+        else:
+            raise ValueError(f"Unknown sampler: {sampler_method}")
+        return sampler
+
+    def _create_pruner(self, pruner_method: str) -> BasePruner:
+        if pruner_method == "halving":
+            pruner = SuccessiveHalvingPruner(
+                min_resource=1, reduction_factor=4, min_early_stopping_rate=0
+            )
+        elif pruner_method == "median":
+            pruner = MedianPruner(
+                n_startup_trials=self.n_startup_trials,
+                n_warmup_steps=self.n_evaluations // 3,
+            )
+        elif pruner_method == "none":
+            # Do not prune
+            pruner = NopPruner()
+        else:
+            raise ValueError(f"Unknown pruner: {pruner_method}")
+        return pruner
+
+    # Important: Objective changed in this project (rbs_gym) to evaluate on the same environment that is used for training (cannot have two existing simulatenously with the current setup)
+    def objective(self, trial: optuna.Trial) -> float:
+
+        kwargs = self._hyperparams.copy()
+
+        trial.model_class = None
+
+        # Hack to use DDPG/TD3 noise sampler
+        trial.n_actions = self._env.action_space.shape[0]
+
+        # Hack when using HerReplayBuffer
+        if kwargs.get("replay_buffer_class") == HerReplayBuffer:
+            trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {})
+
+        # Sample candidate hyperparameters
+        kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial))
+        print(f"\nRunning a new trial with hyperparameters: {kwargs}")
+
+        # Write hyperparameters into a file
+        trial_params_path = os.path.join(self.params_path, "optimization")
+        os.makedirs(trial_params_path, exist_ok=True)
+        with open(
+            os.path.join(
+                trial_params_path, f"hyperparameters_trial_{trial.number}.yml"
+            ),
+            "w",
+        ) as f:
+            yaml.dump(kwargs, f)
+
+        model = ALGOS[self.algo](
+            env=self._env,
+            # Note: Here I enabled tensorboard logs
+            tensorboard_log=self.tensorboard_log,
+            # Note: Here I differ and I seed the trial. I want all trials to have the same starting conditions
+            seed=self.seed,
+            verbose=self.verbose,
+            **kwargs,
+        )
+
+        # Pre-load replay buffer if enabled
+        if self.preload_replay_buffer:
+            if self.preload_replay_buffer.endswith(".pkl"):
+                replay_buffer_path = self.preload_replay_buffer
+            else:
+                replay_buffer_path = os.path.join(
+                    self.preload_replay_buffer, "replay_buffer.pkl"
+                )
+            if os.path.exists(replay_buffer_path):
+                print("Pre-loading replay buffer")
+                if self.algo == "her":
+                    model.load_replay_buffer(
+                        replay_buffer_path, self.truncate_last_trajectory
+                    )
+                else:
+                    model.load_replay_buffer(replay_buffer_path)
+            else:
+                raise Exception(f"Replay buffer {replay_buffer_path} " "does not exist")
+
+        model.trial = trial
+
+        eval_freq = int(self.n_timesteps / self.n_evaluations)
+        # Account for parallel envs
+        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
+        # Use non-deterministic eval for Atari
+        callbacks = get_callback_list({"callback": self.specified_callbacks})
+        path = None
+        if self.optimization_log_path is not None:
+            path = os.path.join(
+                self.optimization_log_path, f"trial_{str(trial.number)}"
+            )
+        eval_callback = TrialEvalCallback(
+            # TODO: Use a separate environment for evaluation during trial
+            model.env,
+            model.trial,
+            best_model_save_path=path,
+            log_path=path,
+            n_eval_episodes=self.n_eval_episodes,
+            eval_freq=eval_freq_,
+            deterministic=self.deterministic_eval,
+            verbose=self.verbose,
+        )
+        callbacks.append(eval_callback)
+
+        try:
+            model.learn(self.n_timesteps, callback=callbacks)
+            # Reset env
+            self._env.reset()
+        except AssertionError as e:
+            # Reset env
+            self._env.reset()
+            print("Trial stopped:", e)
+            # Prune hyperparams that generate NaNs
+            raise optuna.exceptions.TrialPruned()
+        except Exception as err:
+            exception_type = type(err).__name__
+            print("Trial stopped due to raised exception:", exception_type, err)
+            # Prune also all other exceptions
+            raise optuna.exceptions.TrialPruned()
+        is_pruned = eval_callback.is_pruned
+        reward = eval_callback.last_mean_reward
+
+        print(
+            f"\nFinished a trial with reward={reward}, is_pruned={is_pruned} "
+            f"for hyperparameters: {kwargs}"
+        )
+
+        del model
+
+        if is_pruned:
+            raise optuna.exceptions.TrialPruned()
+
+        return reward
+
+    def hyperparameters_optimization(self) -> None:
+
+        if self.verbose > 0:
+            print("Optimizing hyperparameters")
+
+        if self.storage is not None and self.study_name is None:
+            warnings.warn(
+                f"You passed a remote storage: {self.storage} but no `--study-name`."
+                "The study name will be generated by Optuna, make sure to re-use the same study name "
+                "when you want to do distributed hyperparameter optimization."
+            )
+
+        if self.tensorboard_log is not None:
+            warnings.warn(
+                "Tensorboard log is deactivated when running hyperparameter optimization"
+            )
+            self.tensorboard_log = None
+
+        # TODO (external): eval each hyperparams several times to account for noisy evaluation
+        sampler = self._create_sampler(self.sampler)
+        pruner = self._create_pruner(self.pruner)
+
+        if self.verbose > 0:
+            print(f"Sampler: {self.sampler} - Pruner: {self.pruner}")
+
+        study = optuna.create_study(
+            sampler=sampler,
+            pruner=pruner,
+            storage=self.storage,
+            study_name=self.study_name,
+            load_if_exists=True,
+            direction="maximize",
+        )
+
+        try:
+            study.optimize(
+                self.objective,
+                n_trials=self.n_trials,
+                n_jobs=self.n_jobs,
+                gc_after_trial=True,
+                show_progress_bar=True,
+            )
+        except KeyboardInterrupt:
+            pass
+
+        print("Number of finished trials: ", len(study.trials))
+
+        print("Best trial:")
+        trial = study.best_trial
+
+        print("Value: ", trial.value)
+
+        print("Params: ")
+        for key, value in trial.params.items():
+            print(f"    {key}: {value}")
+
+        report_name = (
+            f"report_{self.env_id}_{self.n_trials}-trials-{self.n_timesteps}"
+            f"-{self.sampler}-{self.pruner}_{int(time.time())}"
+        )
+
+        log_path = os.path.join(self.log_folder, self.algo, report_name)
+
+        if self.verbose:
+            print(f"Writing report to {log_path}")
+
+        # Write report
+        os.makedirs(os.path.dirname(log_path), exist_ok=True)
+        study.trials_dataframe().to_csv(f"{log_path}.csv")
+
+        # Save python object to inspect/re-use it later
+        with open(f"{log_path}.pkl", "wb+") as f:
+            pkl.dump(study, f)
+
+        # Skip plots
+        if self.no_optim_plots:
+            return
+
+        # Plot optimization result
+        try:
+            fig1 = plot_optimization_history(study)
+            fig2 = plot_param_importances(study)
+
+            fig1.show()
+            fig2.show()
+        except (ValueError, ImportError, RuntimeError):
+            pass
+
+    def collect_demonstration(self, model):
+        # Any random action will do (this won't actually be used since `preload_replay_buffer` env kwarg is enabled)
+        action = np.array([model.env.action_space.sample()])
+        # Reset env at the beginning
+        obs = model.env.reset()
+        # Collect transitions
+        for i in range(model.replay_buffer.buffer_size):
+            # Note: If `None` is passed to Grasp env, it uses custom action heuristic to reach the target
+            next_obs, rewards, dones, infos = model.env.unwrapped.step(action)
+            # Extract the actual actions from info
+            actual_actions = [info["actual_actions"] for info in infos]
+            # Add to replay buffer
+            model.replay_buffer.add(obs, next_obs, actual_actions, rewards, dones)
+            # Update current observation
+            obs = next_obs
+
+        print("Saving replay buffer")
+        model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl"))
+        model.env.close()
+        exit
--- a/env_manager/rbs_gym/rbs_gym/utils/hyperparams_opt.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/hyperparams_opt.py
@ -0,0 +1,170 @@
+from typing import Any, Dict
+
+import numpy as np
+import optuna
+from stable_baselines3.common.noise import NormalActionNoise
+from torch import nn as nn
+
+
+def sample_sac_params(
+    trial: optuna.Trial,
+) -> Dict[str, Any]:
+    """
+    Sampler for SAC hyperparameters
+    """
+
+    buffer_size = 150000
+    # learning_starts = trial.suggest_categorical(
+    #     "learning_starts", [5000, 10000, 20000])
+    learning_starts = 5000
+
+    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512, 1024, 2048])
+    learning_rate = trial.suggest_float(
+        "learning_rate", low=0.000001, high=0.001, log=True
+    )
+
+    gamma = trial.suggest_float("gamma", low=0.98, high=1.0, log=True)
+    tau = trial.suggest_float("tau", low=0.001, high=0.025, log=True)
+
+    ent_coef = "auto_0.5_0.1"
+    target_entropy = "auto"
+
+    noise_std = trial.suggest_float("noise_std", low=0.01, high=0.2, log=True)
+    action_noise = NormalActionNoise(
+        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
+    )
+
+    train_freq = 1
+    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
+
+    policy_kwargs = dict()
+    net_arch = trial.suggest_categorical("net_arch", [256, 384, 512])
+    policy_kwargs["net_arch"] = [net_arch] * 3
+
+    return {
+        "buffer_size": buffer_size,
+        "learning_starts": learning_starts,
+        "batch_size": batch_size,
+        "learning_rate": learning_rate,
+        "gamma": gamma,
+        "tau": tau,
+        "ent_coef": ent_coef,
+        "target_entropy": target_entropy,
+        "action_noise": action_noise,
+        "train_freq": train_freq,
+        "gradient_steps": gradient_steps,
+        "policy_kwargs": policy_kwargs,
+    }
+
+
+def sample_td3_params(
+    trial: optuna.Trial,
+) -> Dict[str, Any]:
+    """
+    Sampler for TD3 hyperparameters
+    """
+
+    buffer_size = 150000
+    # learning_starts = trial.suggest_categorical(
+    #     "learning_starts", [5000, 10000, 20000])
+    learning_starts = 5000
+
+    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
+    learning_rate = trial.suggest_float(
+        "learning_rate", low=0.000001, high=0.001, log=True
+    )
+
+    gamma = trial.suggest_float("gamma", low=0.98, high=1.0, log=True)
+    tau = trial.suggest_float("tau", low=0.001, high=0.025, log=True)
+
+    target_policy_noise = trial.suggest_float(
+        "target_policy_noise", low=0.00000001, high=0.5, log=True
+    )
+    target_noise_clip = 0.5
+
+    noise_std = trial.suggest_float("noise_std", low=0.025, high=0.5, log=True)
+    action_noise = NormalActionNoise(
+        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
+    )
+
+    train_freq = 1
+    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
+
+    policy_kwargs = dict()
+    net_arch = trial.suggest_categorical("net_arch", [256, 384, 512])
+    policy_kwargs["net_arch"] = [net_arch] * 3
+
+    return {
+        "buffer_size": buffer_size,
+        "learning_starts": learning_starts,
+        "batch_size": batch_size,
+        "learning_rate": learning_rate,
+        "gamma": gamma,
+        "tau": tau,
+        "target_policy_noise": target_policy_noise,
+        "target_noise_clip": target_noise_clip,
+        "action_noise": action_noise,
+        "train_freq": train_freq,
+        "gradient_steps": gradient_steps,
+        "policy_kwargs": policy_kwargs,
+    }
+
+
+def sample_tqc_params(
+    trial: optuna.Trial,
+) -> Dict[str, Any]:
+    """
+    Sampler for TQC hyperparameters
+    """
+
+    buffer_size = 25000
+    learning_starts = 0
+
+    batch_size = 32
+    learning_rate = trial.suggest_float(
+        "learning_rate", low=0.000025, high=0.00075, log=True
+    )
+
+    gamma = 1.0 - trial.suggest_float("gamma", low=0.0001, high=0.025, log=True)
+    tau = trial.suggest_float("tau", low=0.0005, high=0.025, log=True)
+
+    ent_coef = "auto_0.1_0.05"
+    target_entropy = "auto"
+
+    noise_std = trial.suggest_float("noise_std", low=0.01, high=0.1, log=True)
+    action_noise = NormalActionNoise(
+        mean=np.zeros(trial.n_actions), sigma=np.ones(trial.n_actions) * noise_std
+    )
+
+    train_freq = 1
+    gradient_steps = trial.suggest_categorical("gradient_steps", [1, 2])
+
+    policy_kwargs = dict()
+    net_arch = trial.suggest_categorical("net_arch", [128, 256, 384, 512])
+    policy_kwargs["net_arch"] = [net_arch] * 2
+    policy_kwargs["n_quantiles"] = trial.suggest_int("n_quantiles", low=20, high=40)
+    top_quantiles_to_drop_per_net = round(0.08 * policy_kwargs["n_quantiles"])
+    policy_kwargs["n_critics"] = trial.suggest_categorical("n_critics", [2, 3])
+
+    return {
+        "buffer_size": buffer_size,
+        "learning_starts": learning_starts,
+        "batch_size": batch_size,
+        "learning_rate": learning_rate,
+        "gamma": gamma,
+        "tau": tau,
+        "ent_coef": ent_coef,
+        "target_entropy": target_entropy,
+        "top_quantiles_to_drop_per_net": top_quantiles_to_drop_per_net,
+        "action_noise": action_noise,
+        "train_freq": train_freq,
+        "gradient_steps": gradient_steps,
+        "policy_kwargs": policy_kwargs,
+    }
+
+
+HYPERPARAMS_SAMPLER = {
+    "sac": sample_sac_params,
+    "td3": sample_td3_params,
+    "tqc": sample_tqc_params,
+}
--- a/env_manager/rbs_gym/rbs_gym/utils/utils.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/utils.py
@ -0,0 +1,411 @@
+import argparse
+import glob
+import importlib
+import os
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import gymnasium as gym
+
+# For custom activation fn
+import stable_baselines3 as sb3  # noqa: F401
+import torch as th  # noqa: F401
+import yaml
+from sb3_contrib import QRDQN, TQC
+from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.sb2_compat.rmsprop_tf_like import (  # noqa: F401
+    RMSpropTFLike,
+)
+from stable_baselines3.common.vec_env import (
+    DummyVecEnv,
+    SubprocVecEnv,
+    VecEnv,
+    VecFrameStack,
+    VecNormalize,
+)
+from torch import nn as nn  # noqa: F401 pylint: disable=unused-import
+
+ALGOS = {
+    "a2c": A2C,
+    "ddpg": DDPG,
+    "dqn": DQN,
+    "ppo": PPO,
+    "sac": SAC,
+    "td3": TD3,
+    # SB3 Contrib,
+    "qrdqn": QRDQN,
+    "tqc": TQC,
+}
+
+
+def flatten_dict_observations(env: gym.Env) -> gym.Env:
+    assert isinstance(env.observation_space, gym.spaces.Dict)
+    try:
+        return gym.wrappers.FlattenObservation(env)
+    except AttributeError:
+        keys = env.observation_space.spaces.keys()
+        return gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))
+
+
+def get_wrapper_class(
+    hyperparams: Dict[str, Any]
+) -> Optional[Callable[[gym.Env], gym.Env]]:
+    """
+    Get one or more Gym environment wrapper class specified as a hyper parameter
+    "env_wrapper".
+    e.g.
+    env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
+
+    for multiple, specify a list:
+
+    env_wrapper:
+        - utils.wrappers.PlotActionWrapper
+        - utils.wrappers.TimeFeatureWrapper
+
+
+    :param hyperparams:
+    :return: maybe a callable to wrap the environment
+        with one or multiple gym.Wrapper
+    """
+
+    def get_module_name(wrapper_name):
+        return ".".join(wrapper_name.split(".")[:-1])
+
+    def get_class_name(wrapper_name):
+        return wrapper_name.split(".")[-1]
+
+    if "env_wrapper" in hyperparams.keys():
+        wrapper_name = hyperparams.get("env_wrapper")
+
+        if wrapper_name is None:
+            return None
+
+        if not isinstance(wrapper_name, list):
+            wrapper_names = [wrapper_name]
+        else:
+            wrapper_names = wrapper_name
+
+        wrapper_classes = []
+        wrapper_kwargs = []
+        # Handle multiple wrappers
+        for wrapper_name in wrapper_names:
+            # Handle keyword arguments
+            if isinstance(wrapper_name, dict):
+                assert len(wrapper_name) == 1, (
+                    "You have an error in the formatting "
+                    f"of your YAML file near {wrapper_name}. "
+                    "You should check the indentation."
+                )
+                wrapper_dict = wrapper_name
+                wrapper_name = list(wrapper_dict.keys())[0]
+                kwargs = wrapper_dict[wrapper_name]
+            else:
+                kwargs = {}
+            wrapper_module = importlib.import_module(get_module_name(wrapper_name))
+            wrapper_class = getattr(wrapper_module, get_class_name(wrapper_name))
+            wrapper_classes.append(wrapper_class)
+            wrapper_kwargs.append(kwargs)
+
+        def wrap_env(env: gym.Env) -> gym.Env:
+            """
+            :param env:
+            :return:
+            """
+            for wrapper_class, kwargs in zip(wrapper_classes, wrapper_kwargs):
+                env = wrapper_class(env, **kwargs)
+            return env
+
+        return wrap_env
+    else:
+        return None
+
+
+def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]:
+    """
+    Get one or more Callback class specified as a hyper-parameter
+    "callback".
+    e.g.
+    callback: stable_baselines3.common.callbacks.CheckpointCallback
+
+    for multiple, specify a list:
+
+    callback:
+        - utils.callbacks.PlotActionWrapper
+        - stable_baselines3.common.callbacks.CheckpointCallback
+
+    :param hyperparams:
+    :return:
+    """
+
+    def get_module_name(callback_name):
+        return ".".join(callback_name.split(".")[:-1])
+
+    def get_class_name(callback_name):
+        return callback_name.split(".")[-1]
+
+    callbacks = []
+
+    if "callback" in hyperparams.keys():
+        callback_name = hyperparams.get("callback")
+
+        if callback_name is None:
+            return callbacks
+
+        if not isinstance(callback_name, list):
+            callback_names = [callback_name]
+        else:
+            callback_names = callback_name
+
+        # Handle multiple wrappers
+        for callback_name in callback_names:
+            # Handle keyword arguments
+            if isinstance(callback_name, dict):
+                assert len(callback_name) == 1, (
+                    "You have an error in the formatting "
+                    f"of your YAML file near {callback_name}. "
+                    "You should check the indentation."
+                )
+                callback_dict = callback_name
+                callback_name = list(callback_dict.keys())[0]
+                kwargs = callback_dict[callback_name]
+            else:
+                kwargs = {}
+            callback_module = importlib.import_module(get_module_name(callback_name))
+            callback_class = getattr(callback_module, get_class_name(callback_name))
+            callbacks.append(callback_class(**kwargs))
+
+    return callbacks
+
+
+def create_test_env(
+    env_id: str,
+    n_envs: int = 1,
+    stats_path: Optional[str] = None,
+    seed: int = 0,
+    log_dir: Optional[str] = None,
+    should_render: bool = True,
+    hyperparams: Optional[Dict[str, Any]] = None,
+    env_kwargs: Optional[Dict[str, Any]] = None,
+) -> VecEnv:
+    """
+    Create environment for testing a trained agent
+
+    :param env_id:
+    :param n_envs: number of processes
+    :param stats_path: path to folder containing saved running averaged
+    :param seed: Seed for random number generator
+    :param log_dir: Where to log rewards
+    :param should_render: For Pybullet env, display the GUI
+    :param hyperparams: Additional hyperparams (ex: n_stack)
+    :param env_kwargs: Optional keyword argument to pass to the env constructor
+    :return:
+    """
+    # Avoid circular import
+    from rbs_gym.utils.exp_manager import ExperimentManager
+
+    # Create the environment and wrap it if necessary
+    env_wrapper = get_wrapper_class(hyperparams)
+
+    hyperparams = {} if hyperparams is None else hyperparams
+
+    if "env_wrapper" in hyperparams.keys():
+        del hyperparams["env_wrapper"]
+
+    vec_env_kwargs = {}
+    vec_env_cls = DummyVecEnv
+    if n_envs > 1 or (ExperimentManager.is_bullet(env_id) and should_render):
+        # HACK: force SubprocVecEnv for Bullet env
+        # as Pybullet envs does not follow gym.render() interface
+        vec_env_cls = SubprocVecEnv
+        # start_method = 'spawn' for thread safe
+
+    env = make_vec_env(
+        env_id,
+        n_envs=n_envs,
+        monitor_dir=log_dir,
+        seed=seed,
+        wrapper_class=env_wrapper,
+        env_kwargs=env_kwargs,
+        vec_env_cls=vec_env_cls,
+        vec_env_kwargs=vec_env_kwargs,
+    )
+
+    # Load saved stats for normalizing input and rewards
+    # And optionally stack frames
+    if stats_path is not None:
+        if hyperparams["normalize"]:
+            print("Loading running average")
+            print(f"with params: {hyperparams['normalize_kwargs']}")
+            path_ = os.path.join(stats_path, "vecnormalize.pkl")
+            if os.path.exists(path_):
+                env = VecNormalize.load(path_, env)
+                # Deactivate training and reward normalization
+                env.training = False
+                env.norm_reward = False
+            else:
+                raise ValueError(f"VecNormalize stats {path_} not found")
+
+        n_stack = hyperparams.get("frame_stack", 0)
+        if n_stack > 0:
+            print(f"Stacking {n_stack} frames")
+            env = VecFrameStack(env, n_stack)
+    return env
+
+
+def linear_schedule(initial_value: Union[float, str]) -> Callable[[float], float]:
+    """
+    Linear learning rate schedule.
+
+    :param initial_value: (float or str)
+    :return: (function)
+    """
+    if isinstance(initial_value, str):
+        initial_value = float(initial_value)
+
+    def func(progress_remaining: float) -> float:
+        """
+        Progress will decrease from 1 (beginning) to 0
+        :param progress_remaining: (float)
+        :return: (float)
+        """
+        return progress_remaining * initial_value
+
+    return func
+
+
+def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
+    """
+    :param log_folder: Root log folder
+    :return: Dict[str, Tuple[str, str]] representing the trained agents
+    """
+    trained_models = {}
+    for algo in os.listdir(log_folder):
+        if not os.path.isdir(os.path.join(log_folder, algo)):
+            continue
+        for env_id in os.listdir(os.path.join(log_folder, algo)):
+            # Retrieve env name
+            env_id = env_id.split("_")[0]
+            trained_models[f"{algo}-{env_id}"] = (algo, env_id)
+    return trained_models
+
+
+def get_latest_run_id(log_path: str, env_id: str) -> int:
+    """
+    Returns the latest run number for the given log name and log path,
+    by finding the greatest number in the directories.
+
+    :param log_path: path to log folder
+    :param env_id:
+    :return: latest run number
+    """
+    max_run_id = 0
+    for path in glob.glob(os.path.join(log_path, env_id + "_[0-9]*")):
+        file_name = os.path.basename(path)
+        ext = file_name.split("_")[-1]
+        if (
+            env_id == "_".join(file_name.split("_")[:-1])
+            and ext.isdigit()
+            and int(ext) > max_run_id
+        ):
+            max_run_id = int(ext)
+    return max_run_id
+
+
+def get_saved_hyperparams(
+    stats_path: str,
+    norm_reward: bool = False,
+    test_mode: bool = False,
+) -> Tuple[Dict[str, Any], str]:
+    """
+    :param stats_path:
+    :param norm_reward:
+    :param test_mode:
+    :return:
+    """
+    hyperparams = {}
+    if not os.path.isdir(stats_path):
+        stats_path = None
+    else:
+        config_file = os.path.join(stats_path, "config.yml")
+        if os.path.isfile(config_file):
+            # Load saved hyperparameters
+            with open(os.path.join(stats_path, "config.yml"), "r") as f:
+                hyperparams = yaml.load(
+                    f, Loader=yaml.UnsafeLoader
+                )  # pytype: disable=module-attr
+            hyperparams["normalize"] = hyperparams.get("normalize", False)
+        else:
+            obs_rms_path = os.path.join(stats_path, "obs_rms.pkl")
+            hyperparams["normalize"] = os.path.isfile(obs_rms_path)
+
+        # Load normalization params
+        if hyperparams["normalize"]:
+            if isinstance(hyperparams["normalize"], str):
+                normalize_kwargs = eval(hyperparams["normalize"])
+                if test_mode:
+                    normalize_kwargs["norm_reward"] = norm_reward
+            else:
+                normalize_kwargs = {
+                    "norm_obs": hyperparams["normalize"],
+                    "norm_reward": norm_reward,
+                }
+            hyperparams["normalize_kwargs"] = normalize_kwargs
+    return hyperparams, stats_path
+
+
+class StoreDict(argparse.Action):
+    """
+    Custom argparse action for storing dict.
+
+    In: args1:0.0 args2:"dict(a=1)"
+    Out: {'args1': 0.0, arg2: dict(a=1)}
+    """
+
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        self._nargs = nargs
+        super(StoreDict, self).__init__(option_strings, dest, nargs=nargs, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+
+        arg_dict = {}
+
+        if hasattr(namespace, self.dest):
+            current_arg = getattr(namespace, self.dest)
+            if isinstance(current_arg, Dict):
+                arg_dict = getattr(namespace, self.dest)
+
+        for arguments in values:
+            if not arguments:
+                continue
+            key = arguments.split(":")[0]
+            value = ":".join(arguments.split(":")[1:])
+            # Evaluate the string as python code
+            arg_dict[key] = eval(value)
+        setattr(namespace, self.dest, arg_dict)
+
+
+def str2bool(value: Union[str, bool]) -> bool:
+    """
+    Convert logical string to boolean. Can be used as argparse type.
+    """
+
+    if isinstance(value, bool):
+        return value
+    if value.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif value.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+
+
+def empty_str2none(value: Optional[str]) -> Optional[str]:
+    """
+    If string is empty, convert to None. Can be used as argparse type.
+    """
+
+    if not value:
+        return None
+    return value
--- a/env_manager/rbs_gym/rbs_gym/utils/wrappers.py
+++ b/env_manager/rbs_gym/rbs_gym/utils/wrappers.py
@ -0,0 +1,395 @@
+import gymnasium as gym
+import numpy as np
+from matplotlib import pyplot as plt
+from scipy.signal import iirfilter, sosfilt, zpk2sos
+
+
+class DoneOnSuccessWrapper(gym.Wrapper):
+    """
+    Reset on success and offsets the reward.
+    Useful for GoalEnv.
+    """
+
+    def __init__(self, env: gym.Env, reward_offset: float = 0.0, n_successes: int = 1):
+        super(DoneOnSuccessWrapper, self).__init__(env)
+        self.reward_offset = reward_offset
+        self.n_successes = n_successes
+        self.current_successes = 0
+
+    def reset(self):
+        self.current_successes = 0
+        return self.env.reset()
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        if info.get("is_success", False):
+            self.current_successes += 1
+        else:
+            self.current_successes = 0
+        # number of successes in a row
+        done = done or self.current_successes >= self.n_successes
+        reward += self.reward_offset
+        return obs, reward, done, info
+
+    def compute_reward(self, achieved_goal, desired_goal, info):
+        reward = self.env.compute_reward(achieved_goal, desired_goal, info)
+        return reward + self.reward_offset
+
+
+class ActionNoiseWrapper(gym.Wrapper):
+    """
+    Add gaussian noise to the action (without telling the agent),
+    to test the robustness of the control.
+
+    :param env: (gym.Env)
+    :param noise_std: (float) Standard deviation of the noise
+    """
+
+    def __init__(self, env, noise_std=0.1):
+        super(ActionNoiseWrapper, self).__init__(env)
+        self.noise_std = noise_std
+
+    def step(self, action):
+        noise = np.random.normal(
+            np.zeros_like(action), np.ones_like(action) * self.noise_std
+        )
+        noisy_action = action + noise
+        return self.env.step(noisy_action)
+
+
+# from https://docs.obspy.org
+def lowpass(data, freq, df, corners=4, zerophase=False):
+    """
+    Butterworth-Lowpass Filter.
+
+    Filter data removing data over certain frequency ``freq`` using ``corners``
+    corners.
+    The filter uses :func:`scipy.signal.iirfilter` (for design)
+    and :func:`scipy.signal.sosfilt` (for applying the filter).
+
+    :type data: numpy.ndarray
+    :param data: Data to filter.
+    :param freq: Filter corner frequency.
+    :param df: Sampling rate in Hz.
+    :param corners: Filter corners / order.
+    :param zerophase: If True, apply filter once forwards and once backwards.
+        This results in twice the number of corners but zero phase shift in
+        the resulting filtered trace.
+    :return: Filtered data.
+    """
+    fe = 0.5 * df
+    f = freq / fe
+    # raise for some bad scenarios
+    if f > 1:
+        f = 1.0
+        msg = (
+            "Selected corner frequency is above Nyquist. "
+            + "Setting Nyquist as high corner."
+        )
+        print(msg)
+    z, p, k = iirfilter(corners, f, btype="lowpass", ftype="butter", output="zpk")
+    sos = zpk2sos(z, p, k)
+    if zerophase:
+        firstpass = sosfilt(sos, data)
+        return sosfilt(sos, firstpass[::-1])[::-1]
+    else:
+        return sosfilt(sos, data)
+
+
+class LowPassFilterWrapper(gym.Wrapper):
+    """
+    Butterworth-Lowpass
+
+    :param env: (gym.Env)
+    :param freq: Filter corner frequency.
+    :param df: Sampling rate in Hz.
+    """
+
+    def __init__(self, env, freq=5.0, df=25.0):
+        super(LowPassFilterWrapper, self).__init__(env)
+        self.freq = freq
+        self.df = df
+        self.signal = []
+
+    def reset(self):
+        self.signal = []
+        return self.env.reset()
+
+    def step(self, action):
+        self.signal.append(action)
+        filtered = np.zeros_like(action)
+        for i in range(self.action_space.shape[0]):
+            smoothed_action = lowpass(
+                np.array(self.signal)[:, i], freq=self.freq, df=self.df
+            )
+            filtered[i] = smoothed_action[-1]
+        return self.env.step(filtered)
+
+
+class ActionSmoothingWrapper(gym.Wrapper):
+    """
+    Smooth the action using exponential moving average.
+
+    :param env: (gym.Env)
+    :param smoothing_coef: (float) Smoothing coefficient (0 no smoothing, 1 very smooth)
+    """
+
+    def __init__(self, env, smoothing_coef: float = 0.0):
+        super(ActionSmoothingWrapper, self).__init__(env)
+        self.smoothing_coef = smoothing_coef
+        self.smoothed_action = None
+        # from https://github.com/rail-berkeley/softlearning/issues/3
+        # for smoothing latent space
+        # self.alpha = self.smoothing_coef
+        # self.beta = np.sqrt(1 - self.alpha ** 2) / (1 - self.alpha)
+
+    def reset(self):
+        self.smoothed_action = None
+        return self.env.reset()
+
+    def step(self, action):
+        if self.smoothed_action is None:
+            self.smoothed_action = np.zeros_like(action)
+        self.smoothed_action = (
+            self.smoothing_coef * self.smoothed_action
+            + (1 - self.smoothing_coef) * action
+        )
+        return self.env.step(self.smoothed_action)
+
+
+class DelayedRewardWrapper(gym.Wrapper):
+    """
+    Delay the reward by `delay` steps, it makes the task harder but more realistic.
+    The reward is accumulated during those steps.
+
+    :param env: (gym.Env)
+    :param delay: (int) Number of steps the reward should be delayed.
+    """
+
+    def __init__(self, env, delay=10):
+        super(DelayedRewardWrapper, self).__init__(env)
+        self.delay = delay
+        self.current_step = 0
+        self.accumulated_reward = 0.0
+
+    def reset(self):
+        self.current_step = 0
+        self.accumulated_reward = 0.0
+        return self.env.reset()
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+
+        self.accumulated_reward += reward
+        self.current_step += 1
+
+        if self.current_step % self.delay == 0 or done:
+            reward = self.accumulated_reward
+            self.accumulated_reward = 0.0
+        else:
+            reward = 0.0
+        return obs, reward, done, info
+
+
+class HistoryWrapper(gym.Wrapper):
+    """
+    Stack past observations and actions to give an history to the agent.
+
+    :param env: (gym.Env)
+    :param horizon: (int) Number of steps to keep in the history.
+    """
+
+    def __init__(self, env: gym.Env, horizon: int = 5):
+        assert isinstance(env.observation_space, gym.spaces.Box)
+
+        wrapped_obs_space = env.observation_space
+        wrapped_action_space = env.action_space
+
+        # TODO (external): double check, it seems wrong when we have different low and highs
+        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
+        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
+
+        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
+        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
+
+        low = np.concatenate((low_obs, low_action))
+        high = np.concatenate((high_obs, high_action))
+
+        # Overwrite the observation space
+        env.observation_space = gym.spaces.Box(
+            low=low, high=high, dtype=wrapped_obs_space.dtype
+        )
+
+        super(HistoryWrapper, self).__init__(env)
+
+        self.horizon = horizon
+        self.low_action, self.high_action = low_action, high_action
+        self.low_obs, self.high_obs = low_obs, high_obs
+        self.low, self.high = low, high
+        self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
+        self.action_history = np.zeros(low_action.shape, low_action.dtype)
+
+    def _create_obs_from_history(self):
+        return np.concatenate((self.obs_history, self.action_history))
+
+    def reset(self):
+        # Flush the history
+        self.obs_history[...] = 0
+        self.action_history[...] = 0
+        obs = self.env.reset()
+        self.obs_history[..., -obs.shape[-1] :] = obs
+        return self._create_obs_from_history()
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        last_ax_size = obs.shape[-1]
+
+        self.obs_history = np.roll(self.obs_history, shift=-last_ax_size, axis=-1)
+        self.obs_history[..., -obs.shape[-1] :] = obs
+
+        self.action_history = np.roll(
+            self.action_history, shift=-action.shape[-1], axis=-1
+        )
+        self.action_history[..., -action.shape[-1] :] = action
+        return self._create_obs_from_history(), reward, done, info
+
+
+class HistoryWrapperObsDict(gym.Wrapper):
+    """
+    History Wrapper for dict observation.
+
+    :param env: (gym.Env)
+    :param horizon: (int) Number of steps to keep in the history.
+    """
+
+    def __init__(self, env, horizon=5):
+        assert isinstance(env.observation_space.spaces["observation"], gym.spaces.Box)
+
+        wrapped_obs_space = env.observation_space.spaces["observation"]
+        wrapped_action_space = env.action_space
+
+        # TODO (external): double check, it seems wrong when we have different low and highs
+        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
+        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
+
+        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
+        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
+
+        low = np.concatenate((low_obs, low_action))
+        high = np.concatenate((high_obs, high_action))
+
+        # Overwrite the observation space
+        env.observation_space.spaces["observation"] = gym.spaces.Box(
+            low=low, high=high, dtype=wrapped_obs_space.dtype
+        )
+
+        super(HistoryWrapperObsDict, self).__init__(env)
+
+        self.horizon = horizon
+        self.low_action, self.high_action = low_action, high_action
+        self.low_obs, self.high_obs = low_obs, high_obs
+        self.low, self.high = low, high
+        self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
+        self.action_history = np.zeros(low_action.shape, low_action.dtype)
+
+    def _create_obs_from_history(self):
+        return np.concatenate((self.obs_history, self.action_history))
+
+    def reset(self):
+        # Flush the history
+        self.obs_history[...] = 0
+        self.action_history[...] = 0
+        obs_dict = self.env.reset()
+        obs = obs_dict["observation"]
+        self.obs_history[..., -obs.shape[-1] :] = obs
+
+        obs_dict["observation"] = self._create_obs_from_history()
+
+        return obs_dict
+
+    def step(self, action):
+        obs_dict, reward, done, info = self.env.step(action)
+        obs = obs_dict["observation"]
+        last_ax_size = obs.shape[-1]
+
+        self.obs_history = np.roll(self.obs_history, shift=-last_ax_size, axis=-1)
+        self.obs_history[..., -obs.shape[-1] :] = obs
+
+        self.action_history = np.roll(
+            self.action_history, shift=-action.shape[-1], axis=-1
+        )
+        self.action_history[..., -action.shape[-1] :] = action
+
+        obs_dict["observation"] = self._create_obs_from_history()
+
+        return obs_dict, reward, done, info
+
+
+class PlotActionWrapper(gym.Wrapper):
+    """
+    Wrapper for plotting the taken actions.
+    Only works with 1D actions for now.
+    Optionally, it can be used to plot the observations too.
+
+    :param env: (gym.Env)
+    :param plot_freq: (int) Plot every `plot_freq` episodes
+    """
+
+    def __init__(self, env, plot_freq=5):
+        super(PlotActionWrapper, self).__init__(env)
+        self.plot_freq = plot_freq
+        self.current_episode = 0
+        # Observation buffer (Optional)
+        # self.observations = []
+        # Action buffer
+        self.actions = []
+
+    def reset(self):
+        self.current_episode += 1
+        if self.current_episode % self.plot_freq == 0:
+            self.plot()
+            # Reset
+            self.actions = []
+        obs = self.env.reset()
+        self.actions.append([])
+        # self.observations.append(obs)
+        return obs
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+
+        self.actions[-1].append(action)
+        # self.observations.append(obs)
+
+        return obs, reward, done, info
+
+    def plot(self):
+        actions = self.actions
+        x = np.arange(sum([len(episode) for episode in actions]))
+        plt.figure("Actions")
+        plt.title("Actions during exploration", fontsize=14)
+        plt.xlabel("Timesteps", fontsize=14)
+        plt.ylabel("Action", fontsize=14)
+
+        start = 0
+        for i in range(len(self.actions)):
+            end = start + len(self.actions[i])
+            plt.plot(x[start:end], self.actions[i])
+            # Clipped actions: real behavior, note that it is between [-2, 2] for the Pendulum
+            # plt.scatter(x[start:end], np.clip(self.actions[i], -1, 1), s=1)
+            # plt.scatter(x[start:end], self.actions[i], s=1)
+            start = end
+
+        plt.show()
+
+
+class FeatureExtractorFreezeParammetersWrapper(gym.Wrapper):
+    """
+    Freezes parameters of the feature extractor.
+    """
+
+    def __init__(self, env: gym.Env):
+
+        super(FeatureExtractorFreezeParammetersWrapper, self).__init__(env)
+        for param in self.feature_extractor.parameters():
+            param.requires_grad = False
--- a/env_manager/rbs_gym/scripts/evaluate.py
+++ b/env_manager/rbs_gym/scripts/evaluate.py
@ -0,0 +1,294 @@
+#!/usr/bin/env -S python3 -O
+
+import argparse
+import os
+from typing import Dict
+
+import numpy as np
+import torch as th
+import yaml
+from stable_baselines3.common.utils import set_random_seed
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, VecEnvWrapper
+
+from rbs_gym import envs as gz_envs
+from rbs_gym.utils import create_test_env, get_latest_run_id, get_saved_hyperparams
+from rbs_gym.utils.utils import ALGOS, StoreDict, str2bool
+
+
+def main(args: Dict):
+
+    if args.exp_id == 0:
+        args.exp_id = get_latest_run_id(
+            os.path.join(args.log_folder, args.algo), args.env
+        )
+        print(f"Loading latest experiment, id={args.exp_id}")
+
+    # Sanity checks
+    if args.exp_id > 0:
+        log_path = os.path.join(args.log_folder, args.algo, f"{args.env}_{args.exp_id}")
+    else:
+        log_path = os.path.join(args.log_folder, args.algo)
+
+    assert os.path.isdir(log_path), f"The {log_path} folder was not found"
+
+    found = False
+    for ext in ["zip"]:
+        model_path = os.path.join(log_path, f"{args.env}.{ext}")
+        found = os.path.isfile(model_path)
+        if found:
+            break
+
+    if args.load_best:
+        model_path = os.path.join(log_path, "best_model.zip")
+        found = os.path.isfile(model_path)
+
+    if args.load_checkpoint is not None:
+        model_path = os.path.join(
+            log_path, f"rl_model_{args.load_checkpoint}_steps.zip"
+        )
+        found = os.path.isfile(model_path)
+
+    if not found:
+        raise ValueError(
+            f"No model found for {args.algo} on {args.env}, path: {model_path}"
+        )
+
+    off_policy_algos = ["qrdqn", "dqn", "ddpg", "sac", "her", "td3", "tqc"]
+
+    if args.algo in off_policy_algos:
+        args.n_envs = 1
+
+    set_random_seed(args.seed)
+
+    if args.num_threads > 0:
+        if args.verbose > 1:
+            print(f"Setting torch.num_threads to {args.num_threads}")
+        th.set_num_threads(args.num_threads)
+
+    stats_path = os.path.join(log_path, args.env)
+    hyperparams, stats_path = get_saved_hyperparams(
+        stats_path, norm_reward=args.norm_reward, test_mode=True
+    )
+
+    # load env_kwargs if existing
+    env_kwargs = {}
+    args_path = os.path.join(log_path, args.env, "args.yml")
+    if os.path.isfile(args_path):
+        with open(args_path, "r") as f:
+            # pytype: disable=module-attr
+            loaded_args = yaml.load(f, Loader=yaml.UnsafeLoader)
+            if loaded_args["env_kwargs"] is not None:
+                env_kwargs = loaded_args["env_kwargs"]
+    # overwrite with command line arguments
+    if args.env_kwargs is not None:
+        env_kwargs.update(args.env_kwargs)
+
+    log_dir = args.reward_log if args.reward_log != "" else None
+
+    env = create_test_env(
+        args.env,
+        n_envs=args.n_envs,
+        stats_path=stats_path,
+        seed=args.seed,
+        log_dir=log_dir,
+        should_render=not args.no_render,
+        hyperparams=hyperparams,
+        env_kwargs=env_kwargs,
+    )
+
+    kwargs = dict(seed=args.seed)
+    if args.algo in off_policy_algos:
+        # Dummy buffer size as we don't need memory to evaluate the trained agent
+        kwargs.update(dict(buffer_size=1))
+
+
+    custom_objects = {'observation_space': env.observation_space, 'action_space': env.action_space}
+
+    model = ALGOS[args.algo].load(model_path, env=env, custom_objects=custom_objects, **kwargs)
+
+    obs = env.reset()
+
+    # Deterministic by default
+    stochastic = args.stochastic
+    deterministic = not stochastic
+
+    print(
+        f"Evaluating for {args.n_episodes} episodes with a",
+        "deterministic" if deterministic else "stochastic",
+        "policy.",
+    )
+
+    state = None
+    episode_reward = 0.0
+    episode_rewards, episode_lengths, success_episode_lengths = [], [], []
+    ep_len = 0
+    episode = 0
+    # For HER, monitor success rate
+    successes = []
+    while episode < args.n_episodes:
+        action, state = model.predict(obs, state=state, deterministic=deterministic)
+        obs, reward, done, infos = env.step(action)
+        if not args.no_render:
+            env.render("human")
+
+        episode_reward += reward[0]
+        ep_len += 1
+
+        if done and args.verbose > 0:
+            episode += 1
+            print(f"--- Episode {episode}/{args.n_episodes}")
+            # NOTE: for env using VecNormalize, the mean reward
+            # is a normalized reward when `--norm_reward` flag is passed
+            print(f"Episode Reward: {episode_reward:.2f}")
+            episode_rewards.append(episode_reward)
+            print("Episode Length", ep_len)
+            episode_lengths.append(ep_len)
+            if infos[0].get("is_success") is not None:
+                print("Success?:", infos[0].get("is_success", False))
+                successes.append(infos[0].get("is_success", False))
+                if infos[0].get("is_success"):
+                    success_episode_lengths.append(ep_len)
+                print(f"Current success rate: {100 * np.mean(successes):.2f}%")
+            episode_reward = 0.0
+            ep_len = 0
+            state = None
+
+    if args.verbose > 0 and len(successes) > 0:
+        print(f"Success rate: {100 * np.mean(successes):.2f}%")
+
+    if args.verbose > 0 and len(episode_rewards) > 0:
+        print(
+            f"Mean reward: {np.mean(episode_rewards):.2f} "
+            f"+/- {np.std(episode_rewards):.2f}"
+        )
+
+    if args.verbose > 0 and len(episode_lengths) > 0:
+        print(
+            f"Mean episode length: {np.mean(episode_lengths):.2f} "
+            f"+/- {np.std(episode_lengths):.2f}"
+        )
+
+    if args.verbose > 0 and len(success_episode_lengths) > 0:
+        print(
+            f"Mean episode length of successful episodes: {np.mean(success_episode_lengths):.2f} "
+            f"+/- {np.std(success_episode_lengths):.2f}"
+        )
+
+    # Workaround for https://github.com/openai/gym/issues/893
+    if not args.no_render:
+        if args.n_envs == 1 and "Bullet" not in args.env and isinstance(env, VecEnv):
+            # DummyVecEnv
+            # Unwrap env
+            while isinstance(env, VecEnvWrapper):
+                env = env.venv
+            if isinstance(env, DummyVecEnv):
+                env.envs[0].env.close()
+            else:
+                env.close()
+        else:
+            # SubprocVecEnv
+            env.close()
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # Environment and its parameters
+    parser.add_argument(
+        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
+    )
+    parser.add_argument(
+        "--env-kwargs",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional keyword argument to pass to the env constructor",
+    )
+    parser.add_argument("--n-envs", type=int, default=1, help="Number of environments")
+
+    # Algorithm
+    parser.add_argument(
+        "--algo",
+        type=str,
+        choices=list(ALGOS.keys()),
+        required=False,
+        default="sac",
+        help="RL algorithm to use during the training",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=-1,
+        help="Number of threads for PyTorch (-1 to use default)",
+    )
+
+    # Test duration
+    parser.add_argument(
+        "-n",
+        "--n-episodes",
+        type=int,
+        default=200,
+        help="Number of evaluation episodes",
+    )
+
+    # Random seed
+    parser.add_argument("--seed", type=int, default=0, help="Random generator seed")
+
+    # Model to test
+    parser.add_argument(
+        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
+    )
+    parser.add_argument(
+        "--exp-id",
+        type=int,
+        default=0,
+        help="Experiment ID (default: 0: latest, -1: no exp folder)",
+    )
+    parser.add_argument(
+        "--load-best",
+        type=str2bool,
+        default=False,
+        help="Load best model instead of last model if available",
+    )
+    parser.add_argument(
+        "--load-checkpoint",
+        type=int,
+        help="Load checkpoint instead of last model if available, you must pass the number of timesteps corresponding to it",
+    )
+
+    # Deterministic/stochastic actions
+    parser.add_argument(
+        "--stochastic",
+        type=str2bool,
+        default=False,
+        help="Use stochastic actions instead of deterministic",
+    )
+
+    # Logging
+    parser.add_argument(
+        "--reward-log", type=str, default="reward_logs", help="Where to log reward"
+    )
+    parser.add_argument(
+        "--norm-reward",
+        type=str2bool,
+        default=False,
+        help="Normalize reward if applicable (trained with VecNormalize)",
+    )
+
+    # Disable render
+    parser.add_argument(
+        "--no-render",
+        type=str2bool,
+        default=False,
+        help="Do not render the environment (useful for tests)",
+    )
+
+    # Verbosity
+    parser.add_argument(
+        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    main(args)
--- a/env_manager/rbs_gym/scripts/optimize.py
+++ b/env_manager/rbs_gym/scripts/optimize.py
@ -0,0 +1,233 @@
+#!/usr/bin/env -S python3 -O
+
+import argparse
+import difflib
+import os
+import uuid
+from typing import Dict
+
+import gymnasium as gym
+import numpy as np
+import torch as th
+from stable_baselines3.common.utils import set_random_seed
+
+from rbs_gym import envs as gz_envs
+from rbs_gym.utils.exp_manager import ExperimentManager
+from rbs_gym.utils.utils import ALGOS, StoreDict, empty_str2none, str2bool
+
+
+def main(args: Dict):
+
+    # Check if the selected environment is valid
+    # If it could not be found, suggest the closest match
+    registered_envs = set(gym.envs.registry.keys())
+    if args.env not in registered_envs:
+        try:
+            closest_match = difflib.get_close_matches(args.env, registered_envs, n=1)[0]
+        except IndexError:
+            closest_match = "'no close match found...'"
+        raise ValueError(
+            f"{args.env} not found in gym registry, you maybe meant {closest_match}?"
+        )
+
+    # If no specific seed is selected, choose a random one
+    if args.seed < 0:
+        args.seed = np.random.randint(2**32 - 1, dtype=np.int64).item()
+
+    # Set the random seed across platforms
+    set_random_seed(args.seed)
+
+    # Setting num threads to 1 makes things run faster on cpu
+    if args.num_threads > 0:
+        if args.verbose > 1:
+            print(f"Setting torch.num_threads to {args.num_threads}")
+        th.set_num_threads(args.num_threads)
+
+    # Verify that pre-trained agent exists before continuing to train it
+    if args.trained_agent != "":
+        assert args.trained_agent.endswith(".zip") and os.path.isfile(
+            args.trained_agent
+        ), "The trained_agent must be a valid path to a .zip file"
+
+    # If enabled, ensure that the run has a unique ID
+    uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""
+
+    print("=" * 10, args.env, "=" * 10)
+    print(f"Seed: {args.seed}")
+    env_kwargs = {
+        "render_mode": "human"
+    }
+
+    exp_manager = ExperimentManager(
+        args,
+        args.algo,
+        args.env,
+        args.log_folder,
+        args.tensorboard_log,
+        args.n_timesteps,
+        args.eval_freq,
+        args.eval_episodes,
+        args.save_freq,
+        args.hyperparams,
+        args.env_kwargs,
+        args.trained_agent,
+        truncate_last_trajectory=args.truncate_last_trajectory,
+        uuid_str=uuid_str,
+        seed=args.seed,
+        log_interval=args.log_interval,
+        save_replay_buffer=args.save_replay_buffer,
+        preload_replay_buffer=args.preload_replay_buffer,
+        verbose=args.verbose,
+        vec_env_type=args.vec_env,
+    )
+
+    # Prepare experiment
+    model = exp_manager.setup_experiment()
+
+    exp_manager.learn(model)
+    exp_manager.save_trained_model(model)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # Environment and its parameters
+    parser.add_argument(
+        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
+    )
+    parser.add_argument(
+        "--env-kwargs",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional keyword argument to pass to the env constructor",
+    )
+    parser.add_argument(
+        "--vec-env",
+        type=str,
+        choices=["dummy", "subproc"],
+        default="dummy",
+        help="Type of VecEnv to use",
+    )
+
+    # Algorithm and training
+    parser.add_argument(
+        "--algo",
+        type=str,
+        choices=list(ALGOS.keys()),
+        required=False,
+        default="sac",
+        help="RL algorithm to use during the training",
+    )
+    parser.add_argument(
+        "-params",
+        "--hyperparams",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10)",
+    )
+    parser.add_argument(
+        "-n",
+        "--n-timesteps",
+        type=int,
+        default=-1,
+        help="Overwrite the number of timesteps",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=-1,
+        help="Number of threads for PyTorch (-1 to use default)",
+    )
+
+    # Continue training an already trained agent
+    parser.add_argument(
+        "-i",
+        "--trained-agent",
+        type=str,
+        default="",
+        help="Path to a pretrained agent to continue training",
+    )
+
+    # Random seed
+    parser.add_argument("--seed", type=int, default=-1, help="Random generator seed")
+
+    # Saving of model
+    parser.add_argument(
+        "--save-freq",
+        type=int,
+        default=10000,
+        help="Save the model every n steps (if negative, no checkpoint)",
+    )
+    parser.add_argument(
+        "--save-replay-buffer",
+        type=str2bool,
+        default=False,
+        help="Save the replay buffer too (when applicable)",
+    )
+
+    # Pre-load a replay buffer and start training on it
+    parser.add_argument(
+        "--preload-replay-buffer",
+        type=empty_str2none,
+        default="",
+        help="Path to a replay buffer that should be preloaded before starting the training process",
+    )
+
+    # Logging
+    parser.add_argument(
+        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
+    )
+    parser.add_argument(
+        "-tb",
+        "--tensorboard-log",
+        type=empty_str2none,
+        default="tensorboard_logs",
+        help="Tensorboard log dir",
+    )
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=-1,
+        help="Override log interval (default: -1, no change)",
+    )
+    parser.add_argument(
+        "-uuid",
+        "--uuid",
+        type=str2bool,
+        default=False,
+        help="Ensure that the run has a unique ID",
+    )
+
+    # Evaluation
+    parser.add_argument(
+        "--eval-freq",
+        type=int,
+        default=-1,
+        help="Evaluate the agent every n steps (if negative, no evaluation)",
+    )
+    parser.add_argument(
+        "--eval-episodes",
+        type=int,
+        default=5,
+        help="Number of episodes to use for evaluation",
+    )
+
+    # Verbosity
+    parser.add_argument(
+        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
+    )
+
+    # HER specifics
+    parser.add_argument(
+        "--truncate-last-trajectory",
+        type=str2bool,
+        default=True,
+        help="When using HER with online sampling the last trajectory in the replay buffer will be truncated after reloading the replay buffer.",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    main(args=args)
--- a/env_manager/rbs_gym/scripts/spawner.py
+++ b/env_manager/rbs_gym/scripts/spawner.py
@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+
+import time
+import gym_gz_models
+import gym_gz
+from scenario import gazebo as scenario_gazebo
+from scenario import core as scenario_core
+import rclpy
+from rclpy.node import Node
+from scipy.spatial.transform import Rotation as R
+import numpy as np
+from geometry_msgs.msg import PoseStamped
+from rclpy.executors import MultiThreadedExecutor
+from rbs_skill_servers import CartesianControllerPublisher, TakePose
+from rclpy.action import ActionClient
+from control_msgs.action import GripperCommand
+
+
+class Spawner(Node):
+    def __init__(self):
+        super().__init__("spawner")
+        self.gazebo = scenario_gazebo.GazeboSimulator(step_size=0.001,
+                                                 rtf=1.0,
+                                                 steps_per_run=1)
+        self.cartesian_pose = self.create_publisher(
+            PoseStamped, 
+            "/" + "arm0" + "/cartesian_motion_controller/target_frame", 10)
+
+        self.current_pose_sub = self.create_subscription(PoseStamped, 
+                                 "/arm0/cartesian_motion_controller/current_pose", self.callback, 10)
+
+
+        self._action_client = ActionClient(self, 
+                                           GripperCommand,
+                                           "/" + "arm0" + "/gripper_controller/gripper_cmd")
+
+        timer_period = 0.001  # seconds
+        self.timer = self.create_timer(timer_period, self.timer_callback)
+        self.ano_timer = self.create_timer(timer_period, self.another_timer)
+
+        scenario_gazebo.set_verbosity(scenario_gazebo.Verbosity_info)
+
+        self.gazebo.insert_world_from_sdf(
+            "/home/bill-finger/rbs_ws/install/rbs_simulation/share/rbs_simulation/worlds/asm2.sdf")
+        self.gazebo.initialize()
+
+        self.world = self.gazebo.get_world()
+        self.current_pose: PoseStamped = PoseStamped()
+        self.init_sim()
+        self.cube = self.world.get_model("cube")
+        self.stage = 0
+        self.gripper_open = False
+
+    def callback(self, msg: PoseStamped):
+        self.current_pose = msg
+
+    def timer_callback(self):
+        self.gazebo.run()
+
+    def send_goal(self, goal: float):
+        goal_msg = GripperCommand.Goal()
+        goal_msg._command.position = goal
+        goal_msg._command.max_effort = 1.0
+        self._action_client.wait_for_server()
+        self.gripper_open = not self.gripper_open
+
+        self._send_goal_future = self._action_client.send_goal_async(goal_msg)
+
+        self._send_goal_future.add_done_callback(self.goal_response_callback)
+
+    def goal_response_callback(self, future):
+        goal_handle = future.result()
+        if not goal_handle.accepted:
+            self.get_logger().info('Goal rejected :(')
+            return
+
+        self.get_logger().info('Goal accepted :)')
+
+        self._get_result_future = goal_handle.get_result_async()
+        self._get_result_future.add_done_callback(self.get_result_callback)
+
+    def get_result_callback(self, future):
+        result = future.result().result
+        self.get_logger().info('Result: {0}'.format(result.position))
+
+    def another_timer(self):
+        position_over_cube = np.array(self.cube.base_position()) + np.array([0, 0, 0.2])
+        position_cube = np.array(self.cube.base_position()) + np.array([0, 0, 0.03])
+        quat_xyzw = R.from_euler(seq="y", angles=180, degrees=True).as_quat()
+        if self.stage == 0:
+            if self.distance_to_target(position_over_cube, quat_xyzw) > 0.01:
+                self.cartesian_pose.publish(self.get_pose(position_over_cube, quat_xyzw))
+            if self.distance_to_target(position_over_cube, quat_xyzw) < 0.01:
+                self.stage += 1
+        if self.stage == 1:
+            if self.distance_to_target(position_cube, quat_xyzw) > 0.01:
+                if not self.gripper_open:
+                    self.send_goal(0.064)
+                # rclpy.spin_until_future_complete(self, future)
+                self.cartesian_pose.publish(self.get_pose(position_cube, quat_xyzw))
+            if self.distance_to_target(position_cube, quat_xyzw) < 0.01:
+                self.stage += 1
+
+
+
+    def distance_to_target(self, position, orientation):
+        target_pose = self.get_pose(position, orientation)
+        current_position = np.array([
+            self.current_pose.pose.position.x,
+            self.current_pose.pose.position.y,
+            self.current_pose.pose.position.z
+        ])
+        target_position = np.array([
+            target_pose.pose.position.x,
+            target_pose.pose.position.y,
+            target_pose.pose.position.z
+        ])
+        distance = np.linalg.norm(current_position - target_position)
+        
+        return distance
+
+    def init_sim(self):
+        # Create the simulator
+        self.gazebo.gui()
+        self.gazebo.run(paused=True)
+
+        self.world.to_gazebo().set_gravity((0, 0, -9.8))
+
+        self.world.insert_model("/home/bill-finger/rbs_ws/current.urdf")
+        self.gazebo.run(paused=True)
+        for model_name in self.world.model_names():
+            model = self.world.get_model(model_name)
+            print(f"Model: {model_name}")
+            print(f"  Base link: {model.base_frame()}")
+            print("LINKS")
+            for name in model.link_names():
+                position = model.get_link(name).position()
+                orientation_wxyz = np.asarray(model.get_link(name).orientation())
+                orientation = R.from_quat(orientation_wxyz[[1, 2, 3, 0]]).as_euler("xyz")
+                print(f"  {name}:", (*position, *tuple(orientation)))
+            print("JOINTS")
+            for name in model.joint_names():
+                print(f"{name}")
+
+        uri = lambda org, name: f"https://fuel.gazebosim.org/{org}/models/{name}"
+
+        # Download the cube SDF file
+        cube_sdf = scenario_gazebo.get_model_file_from_fuel(
+            uri=uri(org="openrobotics", name="wood cube 5cm"), use_cache=False
+        )
+
+        # Sample a random position
+        random_position = np.random.uniform(low=[-0.2, -0.2, 0.0], high=[-0.3, 0.2, 0.0])
+
+        # Get a unique name
+        model_name = gym_gz.utils.scenario.get_unique_model_name(
+            world=self.world, model_name="cube"
+        )
+
+        # Insert the model
+        assert self.world.insert_model(
+            cube_sdf, scenario_core.Pose(random_position, [1.0, 0, 0, 0]), model_name
+        )
+
+        model = self.world.get_model("rbs_arm")
+        self.cube = self.world.get_model("cube")
+
+        ok_reset_pos = model.to_gazebo().reset_joint_positions(
+            [0.0, -0.240, -3.142, 1.090, 0, 1.617, 0.0, 0.0, 0.0],
+            [name for name in model.joint_names() if "_joint" in name]
+        )
+        if not ok_reset_pos:
+            raise RuntimeError("Failed to reset the robot state")
+
+
+    def get_pose(self, position, orientation) -> PoseStamped:
+        msg = PoseStamped()
+        msg.header.stamp = self.get_clock().now().to_msg()
+        msg.header.frame_id = "base_link"
+        msg.pose.position.x = position[0]
+        msg.pose.position.y = position[1]
+        msg.pose.position.z = position[2]
+        msg.pose.orientation.x = orientation[0]
+        msg.pose.orientation.y = orientation[1]
+        msg.pose.orientation.z = orientation[2]
+        msg.pose.orientation.w = orientation[3]
+        return msg
+
+def main(args=None):
+    rclpy.init(args=args)
+    executor = MultiThreadedExecutor()
+    my_node = Spawner()
+    executor.add_node(my_node)
+    executor.spin()
+    my_node.gazebo.close()
+    my_node.destroy_node()
+    rclpy.shutdown()
+
+if __name__ == "__main__":
+    main()
--- a/env_manager/rbs_gym/scripts/test_agent.py
+++ b/env_manager/rbs_gym/scripts/test_agent.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+import argparse
+from typing import Dict
+
+import gymnasium as gym
+from stable_baselines3.common.env_checker import check_env
+
+from rbs_gym import envs as gz_envs
+from rbs_gym.utils.utils import StoreDict, str2bool
+
+
+def main(args: Dict):
+
+    # Create the environment
+    env = gym.make(args.env, **args.env_kwargs)
+
+    # Initialize random seed
+    env.seed(args.seed)
+
+    # Check the environment
+    if args.check_env:
+        check_env(env, warn=True, skip_render_check=True)
+
+    # Step environment for bunch of episodes
+    for episode in range(args.n_episodes):
+
+        # Initialize returned values
+        done = False
+        total_reward = 0
+
+        # Reset the environment
+        observation = env.reset()
+
+        # Step through the current episode until it is done
+        while not done:
+
+            # Sample random action
+            action = env.action_space.sample()
+
+            # Step the environment with the random action
+            observation, reward, truncated, terminated, info = env.step(action)
+
+            done = truncated or terminated
+
+            # Accumulate the reward
+            total_reward += reward
+
+        print(f"Episode #{episode}\n\treward: {total_reward}")
+
+    # Cleanup once done
+    env.close()
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # Environment and its parameters
+    parser.add_argument(
+        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
+    )
+    parser.add_argument(
+        "--env-kwargs",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional keyword argument to pass to the env constructor",
+    )
+
+    # Number of episodes to run
+    parser.add_argument(
+        "-n",
+        "--n-episodes",
+        type=int,
+        default=10000,
+        help="Overwrite the number of episodes",
+    )
+
+    # Random seed
+    parser.add_argument("--seed", type=int, default=69, help="Random generator seed")
+
+    # Flag to check environment
+    parser.add_argument(
+        "--check-env",
+        type=str2bool,
+        default=True,
+        help="Flag to check the environment before running the random agent",
+    )
+
+    # Flag to enable rendering
+    parser.add_argument(
+        "--render",
+        type=str2bool,
+        default=False,
+        help="Flag to enable rendering",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    main(args=args)
--- a/env_manager/rbs_gym/scripts/train.py
+++ b/env_manager/rbs_gym/scripts/train.py
@ -0,0 +1,284 @@
+#!/usr/bin/env -S python3 -O
+
+import argparse
+import difflib
+import os
+import uuid
+from typing import Dict
+
+import gymnasium as gym
+import numpy as np
+import torch as th
+from stable_baselines3.common.utils import set_random_seed
+
+from rbs_gym import envs as gz_envs
+from rbs_gym.utils.exp_manager import ExperimentManager
+from rbs_gym.utils.utils import ALGOS, StoreDict, empty_str2none, str2bool
+
+
+def main(args: Dict):
+
+    # Check if the selected environment is valid
+    # If it could not be found, suggest the closest match
+    registered_envs = set(gym.envs.registry.keys())
+    if args.env not in registered_envs:
+        try:
+            closest_match = difflib.get_close_matches(args.env, registered_envs, n=1)[0]
+        except IndexError:
+            closest_match = "'no close match found...'"
+        raise ValueError(
+            f"{args.env} not found in gym registry, you maybe meant {closest_match}?"
+        )
+
+    # If no specific seed is selected, choose a random one
+    if args.seed < 0:
+        args.seed = np.random.randint(2**32 - 1, dtype=np.int64).item()
+
+    # Set the random seed across platforms
+    set_random_seed(args.seed)
+
+    # Setting num threads to 1 makes things run faster on cpu
+    if args.num_threads > 0:
+        if args.verbose > 1:
+            print(f"Setting torch.num_threads to {args.num_threads}")
+        th.set_num_threads(args.num_threads)
+
+    # Verify that pre-trained agent exists before continuing to train it
+    if args.trained_agent != "":
+        assert args.trained_agent.endswith(".zip") and os.path.isfile(
+            args.trained_agent
+        ), "The trained_agent must be a valid path to a .zip file"
+
+    # If enabled, ensure that the run has a unique ID
+    uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""
+
+    print("=" * 10, args.env, "=" * 10)
+    print(f"Seed: {args.seed}")
+
+    if args.track:
+        try:
+            import wandb
+            import datetime
+        except ImportError as e:
+            raise ImportError(
+                "if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`"
+            ) from e
+
+        run_name = f"{args.env}__{args.algo}__{args.seed}__{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
+
+        tags = []
+        run = wandb.init(
+            name=run_name,
+            project="rbs-gym",
+            entity=None,
+            tags=tags,
+            config=vars(args),
+            sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
+            monitor_gym=False,  # auto-upload the videos of agents playing the game
+            save_code=True,  # optional
+        )
+        args.tensorboard_log = f"runs/{run_name}"
+
+
+    exp_manager = ExperimentManager(
+        args,
+        args.algo,
+        args.env,
+        args.log_folder,
+        args.tensorboard_log,
+        args.n_timesteps,
+        args.eval_freq,
+        args.eval_episodes,
+        args.save_freq,
+        args.hyperparams,
+        args.env_kwargs,
+        args.trained_agent,
+        args.optimize_hyperparameters,
+        truncate_last_trajectory=args.truncate_last_trajectory,
+        uuid_str=uuid_str,
+        seed=args.seed,
+        log_interval=args.log_interval,
+        save_replay_buffer=args.save_replay_buffer,
+        preload_replay_buffer=args.preload_replay_buffer,
+        verbose=args.verbose,
+        vec_env_type=args.vec_env,
+        no_optim_plots=args.no_optim_plots,
+    )
+
+    # Prepare experiment
+    results = exp_manager.setup_experiment()
+    if results is not None:
+        model, saved_hyperparams = results
+        if args.track:
+            # we need to save the loaded hyperparameters
+            args.saved_hyperparams = saved_hyperparams
+            assert run is not None  # make mypy happy
+            run.config.setdefaults(vars(args))
+
+        # Normal training
+        if model is not None:
+            exp_manager.learn(model)
+            exp_manager.save_trained_model(model)
+    else:
+        exp_manager.hyperparameters_optimization()
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # Environment and its parameters
+    parser.add_argument(
+        "--env", type=str, default="Reach-Gazebo-v0", help="Environment ID"
+    )
+    parser.add_argument(
+        "--env-kwargs",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional keyword argument to pass to the env constructor",
+    )
+    parser.add_argument(
+        "--vec-env",
+        type=str,
+        choices=["dummy", "subproc"],
+        default="dummy",
+        help="Type of VecEnv to use",
+    )
+
+    # Algorithm and training
+    parser.add_argument(
+        "--algo",
+        type=str,
+        choices=list(ALGOS.keys()),
+        required=False,
+        default="sac",
+        help="RL algorithm to use during the training",
+    )
+    parser.add_argument(
+        "-params",
+        "--hyperparams",
+        type=str,
+        nargs="+",
+        action=StoreDict,
+        help="Optional RL hyperparameter overwrite (e.g. learning_rate:0.01 train_freq:10)",
+    )
+    parser.add_argument(
+        "-n",
+        "--n-timesteps",
+        type=int,
+        default=-1,
+        help="Overwrite the number of timesteps",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=-1,
+        help="Number of threads for PyTorch (-1 to use default)",
+    )
+
+    # Continue training an already trained agent
+    parser.add_argument(
+        "-i",
+        "--trained-agent",
+        type=str,
+        default="",
+        help="Path to a pretrained agent to continue training",
+    )
+
+    # Random seed
+    parser.add_argument("--seed", type=int, default=-1, help="Random generator seed")
+
+    # Saving of model
+    parser.add_argument(
+        "--save-freq",
+        type=int,
+        default=10000,
+        help="Save the model every n steps (if negative, no checkpoint)",
+    )
+    parser.add_argument(
+        "--save-replay-buffer",
+        type=str2bool,
+        default=False,
+        help="Save the replay buffer too (when applicable)",
+    )
+
+    # Pre-load a replay buffer and start training on it
+    parser.add_argument(
+        "--preload-replay-buffer",
+        type=empty_str2none,
+        default="",
+        help="Path to a replay buffer that should be preloaded before starting the training process",
+    )
+    parser.add_argument(
+        "--track",
+        type=str2bool,
+        default=False,
+        help="Track experiment using wandb"
+    )
+    # optimization parameters
+    parser.add_argument(
+        "--optimize-hyperparameters",
+        type=str2bool,
+        default=False,
+        help="Run optimization or not?"
+    )
+    parser.add_argument(
+        "--no-optim-plots", action="store_true", default=False, help="Disable hyperparameter optimization plots"
+    )
+
+    # Logging
+    parser.add_argument(
+        "-f", "--log-folder", type=str, default="logs", help="Path to the log directory"
+    )
+    parser.add_argument(
+        "-tb",
+        "--tensorboard-log",
+        type=empty_str2none,
+        default="tensorboard_logs",
+        help="Tensorboard log dir",
+    )
+    parser.add_argument(
+        "--log-interval",
+        type=int,
+        default=-1,
+        help="Override log interval (default: -1, no change)",
+    )
+    parser.add_argument(
+        "-uuid",
+        "--uuid",
+        type=str2bool,
+        default=False,
+        help="Ensure that the run has a unique ID",
+    )
+
+    # Evaluation
+    parser.add_argument(
+        "--eval-freq",
+        type=int,
+        default=-1,
+        help="Evaluate the agent every n steps (if negative, no evaluation)",
+    )
+    parser.add_argument(
+        "--eval-episodes",
+        type=int,
+        default=5,
+        help="Number of episodes to use for evaluation",
+    )
+
+    # Verbosity
+    parser.add_argument(
+        "--verbose", type=int, default=1, help="Verbose mode (0: no output, 1: INFO)"
+    )
+
+    # HER specifics
+    parser.add_argument(
+        "--truncate-last-trajectory",
+        type=str2bool,
+        default=True,
+        help="When using HER with online sampling the last trajectory in the replay buffer will be truncated after reloading the replay buffer.",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    main(args=args)
--- a/env_manager/rbs_gym/scripts/velocity.py
+++ b/env_manager/rbs_gym/scripts/velocity.py
@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+
+import rclpy
+from rclpy.node import Node
+import numpy as np
+import quaternion
+from geometry_msgs.msg import Twist
+from geometry_msgs.msg import PoseStamped
+import tf2_ros
+import sys
+import time
+import threading
+import os
+
+
+class Converter(Node):
+    """Convert Twist messages to PoseStamped
+
+    Use this node to integrate twist messages into a moving target pose in
+    Cartesian space.  An initial TF lookup assures that the target pose always
+    starts at the robot's end-effector.
+    """
+
+    def __init__(self):
+        super().__init__("converter")
+
+        self.twist_topic = self.declare_parameter("twist_topic", "/cartesian_motion_controller/target_twist").value
+        self.pose_topic = self.declare_parameter("pose_topic", "/cartesian_motion_controller/target_frame").value
+        self.frame_id = self.declare_parameter("frame_id", "base_link").value
+        self.end_effector = self.declare_parameter("end_effector", "gripper_grasp_point").value
+
+        self.tf_buffer = tf2_ros.Buffer()
+        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self)
+        self.rot = np.quaternion(0, 0, 0, 1)
+        self.pos = [0, 0, 0]
+
+        self.pub = self.create_publisher(PoseStamped, self.pose_topic, 3)
+        self.sub = self.create_subscription(Twist, self.twist_topic, self.twist_cb, 1)
+        self.last = time.time()
+
+        self.startup_done = False
+        period = 1.0 / self.declare_parameter("publishing_rate", 100).value
+        self.timer = self.create_timer(period, self.publish)
+
+        self.thread = threading.Thread(target=self.startup, daemon=True)
+        self.thread.start()
+
+    def startup(self):
+        """Make sure to start at the robot's current pose"""
+        # Wait until we entered spinning in the main thread.
+        time.sleep(1)
+        try:
+            start = self.tf_buffer.lookup_transform(
+                target_frame=self.frame_id,
+                source_frame=self.end_effector,
+                time=rclpy.time.Time(),
+            )
+
+        except (
+            tf2_ros.InvalidArgumentException,
+            tf2_ros.LookupException,
+            tf2_ros.ConnectivityException,
+            tf2_ros.ExtrapolationException,
+        ) as e:
+            print(f"Startup failed: {e}")
+            os._exit(1)
+
+        self.pos[0] = start.transform.translation.x
+        self.pos[1] = start.transform.translation.y
+        self.pos[2] = start.transform.translation.z
+        self.rot.x = start.transform.rotation.x
+        self.rot.y = start.transform.rotation.y
+        self.rot.z = start.transform.rotation.z
+        self.rot.w = start.transform.rotation.w
+        self.startup_done = True
+
+    def twist_cb(self, data):
+        """Numerically integrate twist message into a pose
+
+        Use global self.frame_id as reference for the navigation commands.
+        """
+        now = time.time()
+        dt = now - self.last
+        self.last = now
+
+        # Position update
+        self.pos[0] += data.linear.x * dt
+        self.pos[1] += data.linear.y * dt
+        self.pos[2] += data.linear.z * dt
+
+        # Orientation update
+        wx = data.angular.x
+        wy = data.angular.y
+        wz = data.angular.z
+
+        _, q = quaternion.integrate_angular_velocity(
+            lambda _: (wx, wy, wz), 0, dt, self.rot
+        )
+
+        self.rot = q[-1]  # the last one is after dt passed
+
+    def publish(self):
+        if not self.startup_done:
+            return
+        try:
+            msg = PoseStamped()
+            msg.header.stamp = self.get_clock().now().to_msg()
+            msg.header.frame_id = self.frame_id
+            msg.pose.position.x = self.pos[0]
+            msg.pose.position.y = self.pos[1]
+            msg.pose.position.z = self.pos[2]
+            msg.pose.orientation.x = self.rot.x
+            msg.pose.orientation.y = self.rot.y
+            msg.pose.orientation.z = self.rot.z
+            msg.pose.orientation.w = self.rot.w
+
+            self.pub.publish(msg)
+        except Exception:
+            # Swallow 'publish() to closed topic' error.
+            # This rarely happens on killing this node.
+            pass
+
+
+def main(args=None):
+    rclpy.init(args=args)
+    node = Converter()
+    rclpy.spin(node)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        rclpy.shutdown()
+        sys.exit(0)
+    except Exception as e:
+        print(e)
+        sys.exit(1)
--- a/rbs_bringup/config/roboclone.yaml
+++ b/rbs_bringup/config/roboclone.yaml
@ -4,7 +4,7 @@ scene_config:
    pose:
      position:
        x: -0.45
-        y: -2.0
+        y: 0.0
        z: 1.6
      orientation:
        x: 3.14159
@ -15,7 +15,7 @@ scene_config:
    pose:
      position:
        x: 0.45
-        y: -2.0
+        y: 0.0
        z: 1.6
      orientation:
        x: 3.14159
--- a/rbs_bringup/launch/multi_robot.launch.py
+++ b/rbs_bringup/launch/multi_robot.launch.py
@ -285,7 +285,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("robots_config_file", 
-                              default_value="robot_scene", 
+                              default_value="roboclone.yaml", 
                              description="Filename for config file with robots in scene")
    )

--- a/rbs_bringup/launch/rbs_robot.launch.py
+++ b/rbs_bringup/launch/rbs_robot.launch.py
@ -14,12 +14,14 @@ from launch_ros.substitutions import FindPackageShare
 import xacro
 import os
 from ament_index_python.packages import get_package_share_directory
+from rbs_launch_utils.launch_common import load_yaml


 def launch_setup(context, *args, **kwargs):
    # Initialize Arguments
    robot_type = LaunchConfiguration("robot_type")
    # General arguments
+    launch_rviz = LaunchConfiguration("launch_rviz")
    with_gripper_condition = LaunchConfiguration("with_gripper")
    controllers_file = LaunchConfiguration("controllers_file")
    cartesian_controllers = LaunchConfiguration("cartesian_controllers")
@ -44,14 +46,20 @@ def launch_setup(context, *args, **kwargs):
    pitch = LaunchConfiguration("pitch")
    yaw = LaunchConfiguration("yaw")
    namespace = LaunchConfiguration("namespace")
+    multi_robot = LaunchConfiguration("multi_robot")
    robot_name = robot_name.perform(context)
    namespace = namespace.perform(context)
    robot_type = robot_type.perform(context)
    description_package = description_package.perform(context)
    description_file = description_file.perform(context)
    controllers_file = controllers_file.perform(context)
+    multi_robot = multi_robot.perform(context)

-    # remappings = [("/tf", "tf"), ("/tf_static", "tf_static")]
+    remappings = []
+    if multi_robot == "true":
+        remappings.append([("/tf", "tf"), ("/tf_static", "tf_static")])
+
+    controllers_file = os.path.join(get_package_share_directory(description_package), "config", controllers_file)

    xacro_file = os.path.join(get_package_share_directory(description_package), "urdf", description_file)
    robot_description_doc = xacro.process_file(
@ -67,7 +75,6 @@ def launch_setup(context, *args, **kwargs):
            "roll": roll.perform(context),
            "pitch": pitch.perform(context),
            "yaw": yaw.perform(context)
-            #TODO: add rotation and add probably via dict
        }
    )

@ -83,7 +90,7 @@ def launch_setup(context, *args, **kwargs):
                [FindPackageShare(moveit_config_package), "config/moveit", "rbs_arm.srdf.xacro"]
            ),
            " ",
-            "name:=",robot_name," ",
+            "name:=",robot_type," ",
            "with_gripper:=",with_gripper_condition, " ",
            "gripper_name:=", gripper_name, " ",
        ]
@ -93,12 +100,19 @@ def launch_setup(context, *args, **kwargs):
    robot_description_kinematics = PathJoinSubstitution(
        [FindPackageShare(moveit_config_package), "config", "kinematics.yaml"]
    )
+
+    # kinematics_yaml = load_yaml("rbs_arm", "config/kinematics.yaml")
+
+    robot_description_kinematics = {"robot_description_kinematics": robot_description_kinematics}
+    rviz_config_file = PathJoinSubstitution(
+        [FindPackageShare("rbs_bringup"), "config", "rbs.rviz"]
+    )
    robot_state_publisher = Node(
        package="robot_state_publisher",
        executable="robot_state_publisher",
        namespace=namespace,
        output="both",
-        # remappings=remappings,
+        remappings=remappings,
        parameters=[{"use_sim_time": use_sim_time}, robot_description],
    )

@ -121,6 +135,19 @@ def launch_setup(context, *args, **kwargs):
        ]
    )

+    rviz = Node(
+        package="rviz2",
+        executable="rviz2",
+        name="rviz2",
+        output="log",
+        arguments=["-d", rviz_config_file],
+        parameters=[
+            robot_description,
+            robot_description_semantic,
+            robot_description_kinematics
+        ],
+        condition=IfCondition(launch_rviz))
+
    control = IncludeLaunchDescription(
        PythonLaunchDescriptionSource([
            PathJoinSubstitution([
@ -209,9 +236,9 @@ def launch_setup(context, *args, **kwargs):
        control,
        moveit,
        skills,
-        task_planner,
-        perception,
-        # rviz
+        # task_planner,
+        # perception,
+        rviz
    ]
    return nodes_to_start

@ -230,7 +257,7 @@ def generate_launch_description():
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
-            default_value="rbs_arm_controllers_gazebosim.yaml",
+            default_value="rbs_arm0_controllers.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
@ -310,7 +337,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("launch_rviz",
-                              default_value="true",
+                              default_value="false",
                              description="Launch RViz?")
    )
    declared_arguments.append(
@ -401,6 +428,29 @@ def generate_launch_description():
                              default_value="0.0",
                              description="Position of robot in world by Z")
    )
+    declared_arguments.append(
+        DeclareLaunchArgument("roll", 
+                              default_value="0.0", 
+                              description="Position of robot in world by Z")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("pitch", 
+                              default_value="0.0", 
+                              description="Position of robot in world by Z")
+    )
+    declared_arguments.append(
+        DeclareLaunchArgument("yaw", 
+                              default_value="0.0", 
+                              description="Position of robot in world by Z")
+    )
+
+    declared_arguments.append(
+        DeclareLaunchArgument(
+            "multi_robot",
+            default_value="false",
+            description="Flag if you use multi robot setup"
+        )
+    )


    return LaunchDescription(declared_arguments + [OpaqueFunction(function=launch_setup)])
--- a/rbs_bringup/launch/single_robot.launch.py
+++ b/rbs_bringup/launch/single_robot.launch.py
@ -76,6 +76,9 @@ def launch_setup(context, *args, **kwargs):
    )

    # controller_paramfile = configured_params.perform(context)
+    # controller_paramfile = PathJoinSubstitution([
+    #     FindPackageShare(robot_type), "config", "rbs_arm0_controllers.yaml"
+    # ])
    # namespace = "/" + robot_name.perform(context) 
    namespace = ""

@ -102,7 +105,7 @@ def launch_setup(context, *args, **kwargs):
            "gripper_name": gripper_name,
            "controllers_file": controllers_file,
            "robot_type": robot_type,
-            "controllers_file": initial_joint_controllers_file_path,
+            # "controllers_file": controller_paramfile,
            "cartesian_controllers": cartesian_controllers,
            "description_package": description_package,
            "description_file": description_file,
@ -148,7 +151,7 @@ def generate_launch_description():
    declared_arguments.append(
        DeclareLaunchArgument(
            "controllers_file",
-            default_value="rbs_arm_controllers_gazebosim.yaml",
+            default_value="rbs_arm0_controllers.yaml",
            description="YAML file with the controllers configuration.",
        )
    )
@ -221,7 +224,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("with_gripper", 
-                              default_value="true", 
+                              default_value="false", 
                              description="With gripper or not?")
    )
    declared_arguments.append(
--- a/rbs_bringup/rbs_launch_utils/launch_common.py
+++ b/rbs_bringup/rbs_launch_utils/launch_common.py
@ -4,7 +4,7 @@ import math
 import os
 import sys
 import yaml
-from typing import Dict
+from typing import Any, Dict

 from ament_index_python.packages import get_package_share_directory

@ -24,7 +24,7 @@ def construct_angle_degrees(loader, node) -> float:
    """Utility function for converting degrees into radians from yaml."""
    return math.radians(construct_angle_radians(loader, node))

-def load_yaml(package_name: str, file_path: str) -> Dict:
+def load_yaml(package_name: str, file_path: str) -> dict[str, Any]:
    package_path = get_package_share_directory(package_name)
    absolute_file_path = os.path.join(package_path, file_path)

--- a/rbs_bt_executor/bt_trees/test_tree.xml
+++ b/rbs_bt_executor/bt_trees/test_tree.xml
@ -6,7 +6,7 @@
            <Action ID="EnvStarter" env_class="gz_enviroment::GzEnviroment" env_name="gz_enviroment"
                server_name="/env_manager/start_env" server_timeout="1000" workspace="{workspace}" />
            <SubTreePlus ID="WorkspaceInspection" __autoremap="1" goal_pose="{workspace}"
-                robot_name="rbs_arm" />
+                robot_name="ur_manipulator" />
        </Sequence>
    </BehaviorTree>
    <!-- ////////// -->
--- a/rbs_bt_executor/src/MoveToPoseArray.cpp
+++ b/rbs_bt_executor/src/MoveToPoseArray.cpp
@ -28,6 +28,8 @@ public:
    if (!target_pose_vec_.poses.empty()) {
      goal.robot_name = robot_name_;
      goal.target_pose = target_pose_vec_.poses.at(0);
+      goal.end_effector_acceleration = 1.0;
+      goal.end_effector_velocity = 1.0;

      target_pose_vec_.poses.erase(target_pose_vec_.poses.begin());
      setOutput<geometry_msgs::msg::PoseArray>("pose_vec_out",
--- a/rbs_simulation/launch/rbs_simulation.launch.py
+++ b/rbs_simulation/launch/rbs_simulation.launch.py
@ -1,561 +0,0 @@
-import os
-from launch import LaunchDescription, LaunchContext
-from launch.actions import (
-    DeclareLaunchArgument,
-    IncludeLaunchDescription,
-    ExecuteProcess,
-    OpaqueFunction
-)
-from ament_index_python.packages import get_package_share_directory
-from launch.conditions import IfCondition, UnlessCondition
-from launch.launch_description_sources import PythonLaunchDescriptionSource
-from launch.substitutions import Command, FindExecutable, LaunchConfiguration, PathJoinSubstitution
-from launch_ros.actions import Node
-from launch_ros.substitutions import FindPackageShare
-from ur_moveit_config.launch_common import load_yaml
-import xacro
-
-def generate_launch_description():
-    declared_arguments = []
-    # UR specific arguments
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "rbs_robot_type",
-            description="Type of robot by name",
-            choices=["ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"],
-            default_value="ur5e",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "with_gripper",
-            default_value="false",
-            description="With gripper or not?",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "safety_limits",
-            default_value="true",
-            description="Enables the safety limits controller if true.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "safety_pos_margin",
-            default_value="0.15",
-            description="The margin to lower and upper limits in the safety controller.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "safety_k_position",
-            default_value="20",
-            description="k-position factor in the safety controller.",
-        )
-    )
-    # General arguments
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "runtime_config_package",
-            default_value="ur_moveit_config",
-            description='Package with the controller\'s configuration in "config" folder. \
-        Usually the argument is not set, it enables use of a custom setup.',
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "controllers_file",
-            default_value="ur_controllers.yaml",
-            description="YAML file with the controllers configuration.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "controllers_with_gripper_file",
-            default_value="ur_plus_gripper_controllers.yaml",
-            description="YAML file with the UR + gripper_controller configuration.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "description_package",
-            default_value="ur_description",
-            description="Description package with robot URDF/XACRO files. Usually the argument \
-        is not set, it enables use of a custom description.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "description_file",
-            default_value="ur.urdf.xacro",
-            description="URDF/XACRO description file with the robot.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "tf_prefix",
-            default_value='""',
-            description="tf_prefix of the joint names, useful for \
-        multi-robot setup. If changed than also joint names in the controllers' configuration \
-        have to be updated.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "start_joint_controller",
-            default_value="false",
-            description="Enable headless mode for robot control",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "initial_joint_controller",
-            default_value="joint_trajectory_controller",
-            description="Robot controller to start.",
-        )
-    )
-    
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "initial_gripper_controller",
-            default_value="gripper_controller",
-            description="Robot controller to start.",
-        )
-    )
-
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "moveit_config_package",
-            default_value="ur_moveit_config",
-            description="MoveIt config package with robot SRDF/XACRO files. Usually the argument \
-        is not set, it enables use of a custom moveit config.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "moveit_config_file",
-            default_value="ur.srdf.xacro",
-            description="MoveIt SRDF/XACRO description file with the robot.",
-        )
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument(
-            "use_sim_time",
-            default_value="true",
-            description="Make MoveIt to use simulation time. This is needed for the trajectory planing in simulation.",
-        )
-    )
-
-    declared_arguments.append(
-        DeclareLaunchArgument("launch_rviz", default_value="true", description="Launch RViz?")
-    )
-
-    declared_arguments.append(
-        DeclareLaunchArgument("sim_gazebo", default_value="false", description="Gazebo Simulation")
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument("sim_mujoco", default_value="true", description="Gazebo Simulation")
-    )
-    declared_arguments.append(
-        DeclareLaunchArgument("sim_fake", default_value="false", description="Gazebo Simulation")
-    )
-
-    # Initialize Arguments
-    rbs_robot_type = LaunchConfiguration("rbs_robot_type")
-    safety_limits = LaunchConfiguration("safety_limits")
-    safety_pos_margin = LaunchConfiguration("safety_pos_margin")
-    safety_k_position = LaunchConfiguration("safety_k_position")
-    # General arguments
-    runtime_config_package = LaunchConfiguration("runtime_config_package")
-    with_gripper_condition = LaunchConfiguration("with_gripper")
-    controllers_file = LaunchConfiguration("controllers_file")
-    
-    description_package = LaunchConfiguration("description_package")
-    description_file = LaunchConfiguration("description_file")
-    tf_prefix = LaunchConfiguration("tf_prefix")
-    start_joint_controller = LaunchConfiguration("start_joint_controller")
-    initial_joint_controller = LaunchConfiguration("initial_joint_controller")
-    initial_gripper_controller = LaunchConfiguration("initial_gripper_controller")
-    launch_rviz = LaunchConfiguration("launch_rviz")
-    moveit_config_package = LaunchConfiguration("moveit_config_package")
-    moveit_config_file = LaunchConfiguration("moveit_config_file")
-    use_sim_time = LaunchConfiguration("use_sim_time")
-    sim_gazebo = LaunchConfiguration("sim_gazebo")
-    sim_mujoco = LaunchConfiguration("sim_mujoco")
-    sim_fake = LaunchConfiguration("sim_fake")
-
-
-    initial_joint_controllers_file_path = PathJoinSubstitution(
-        [FindPackageShare(runtime_config_package), "config", controllers_file]
-    )
-
-    rviz_config_file = PathJoinSubstitution(
-        [FindPackageShare(moveit_config_package), "rviz", "view_robot.rviz"]
-    )
-
-    world_config_file = PathJoinSubstitution(
-        [FindPackageShare("rbs_simulation"), "worlds", "mir.sdf"]
-    )
-    
-    mujoco_model = PathJoinSubstitution(
-        [FindPackageShare("rbs_simulation"), "mujoco_model", "current_mj.xml"]
-    )
-    
-    assemble_dir = os.path.join(
-        get_package_share_directory("rbs_task_planner"), "example", "sdf_models"
-    )
-    
-    points_params = load_yaml("rbs_bt_executor", "config/gripperPositions.yaml")
-
-    robot_description_content = Command(
-        [
-            PathJoinSubstitution([FindExecutable(name="xacro")]),
-            " ",
-            PathJoinSubstitution(
-                [FindPackageShare(description_package), "urdf", description_file]
-            ),
-            " ",
-            "safety_limits:=", safety_limits, " ",
-            "safety_pos_margin:=", safety_pos_margin, " ",
-            "safety_k_position:=", safety_k_position, " ",
-            "name:=", "ur", " ",
-            "ur_type:=", rbs_robot_type, " ",
-            "tf_prefix:=", tf_prefix, " ",
-            "sim_mujoco:=", sim_mujoco, " ",
-            "sim_gazebo:=", sim_gazebo, " ",
-            "sim_fake:=", sim_fake, " ",
-            "simulation_controllers:=", initial_joint_controllers_file_path, " ",
-            "with_gripper:=", with_gripper_condition, " ",
-            "mujoco_model:=", mujoco_model,
-        ]
-    )
-    robot_description = {"robot_description": robot_description_content}
-    
-    control_node = Node(
-        package="controller_manager",
-        executable="ros2_control_node",
-        parameters=[robot_description, initial_joint_controllers_file_path],
-        output="both",
-        remappings=[
-            ('motion_control_handle/target_frame', 'target_frame'),
-            ('cartesian_compliance_controller/target_frame', 'target_frame'),
-            ('cartesian_compliance_controller/target_wrench', 'target_wrench'),
-            ('cartesian_compliance_controller/ft_sensor_wrench', 'ft_sensor_wrench'),
-            ]
-    )
-
-    robot_state_publisher_node = Node(
-        package="robot_state_publisher",
-        executable="robot_state_publisher",
-        output="both",
-        parameters=[{"use_sim_time": True}, robot_description],
-    )
-
-    joint_state_broadcaster_spawner = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=["joint_state_broadcaster", "-c", "/controller_manager"],
-    )
-
-    # There may be other controllers of the joints, but this is the initially-started one
-    initial_joint_controller_spawner_started = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=[initial_joint_controller, "-c", "/controller_manager"],
-        condition=IfCondition(start_joint_controller),
-    )
-    initial_joint_controller_spawner_stopped = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=[initial_joint_controller, "-c", "/controller_manager", "--inactive"],
-        condition=UnlessCondition(start_joint_controller),
-    )
-    
-    gripper_controller = ExecuteProcess(
-        cmd=['ros2', 'control', 'load_controller', '--set-state', 'active',
-             "gripper_controller"],
-        output='screen',
-        condition=IfCondition(with_gripper_condition)
-    )
-    cartesian_motion_controller_spawner = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=["cartesian_motion_controller", "--inactive", "-c", "/controller_manager"],
-    )
-    motion_control_handle_spawner = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=["motion_control_handle", "--inactive", "-c", "/controller_manager"],
-    )
-    
-    cartesian_compliance_controller_spawner = Node(
-        package="controller_manager",
-        executable="spawner",
-        arguments=["cartesian_compliance_controller", "--inactive", "-c", "/controller_manager"],
-    )
-
-    # Gazebo nodes
-    gazebo = IncludeLaunchDescription(
-            PythonLaunchDescriptionSource(
-                [os.path.join(get_package_share_directory('ros_ign_gazebo'),
-                              'launch', 'ign_gazebo.launch.py')]),
-                launch_arguments=[('ign_args', [' -r ',world_config_file, " --physics-engine ignition-physics-dartsim-plugin --render-engine ogre2"])],
-                condition=IfCondition(sim_gazebo))
-    # Spawn robot
-    gazebo_spawn_robot = Node(package='ros_ign_gazebo', executable='create',
-                 arguments=[
-                    '-name', rbs_robot_type,
-                    '-x', '0.0',
-                    '-z', '0.0',
-                    '-y', '0.0',
-                    '-topic', '/robot_description'],
-                 output='screen',
-                 condition=IfCondition(sim_gazebo))
-
-    # MoveIt Configuration
-    robot_description_semantic_content = Command(
-        [
-            PathJoinSubstitution([FindExecutable(name="xacro")]),
-            " ",
-            PathJoinSubstitution(
-                [FindPackageShare(moveit_config_package), "srdf", moveit_config_file]
-            ),
-            " ",
-            "name:=",
-            "ur",
-            " ",
-            "tf_prefix:=",
-            tf_prefix,
-            " ",
-            "with_gripper:=",
-            with_gripper_condition
-        ]
-    )
-    robot_description_semantic = {"robot_description_semantic": robot_description_semantic_content}
-
-    robot_description_kinematics = PathJoinSubstitution(
-        [FindPackageShare(moveit_config_package), "config", "kinematics.yaml"]
-    )
-
-    # Planning Configuration
-    ompl_planning_pipeline_config = {
-        "move_group": {
-            "planning_plugin": "ompl_interface/OMPLPlanner",
-            "request_adapters": """default_planner_request_adapters/AddTimeOptimalParameterization default_planner_request_adapters/FixWorkspaceBounds default_planner_request_adapters/FixStartStateBounds default_planner_request_adapters/FixStartStateCollision default_planner_request_adapters/FixStartStatePathConstraints""",
-            "start_state_max_bounds_error": 0.1,
-        }
-    }
-    ompl_planning_yaml = load_yaml("ur_moveit_config", "config/ompl_planning.yaml")
-    ompl_planning_pipeline_config["move_group"].update(ompl_planning_yaml)
-
-    controllers_yaml = load_yaml("ur_moveit_config", "config/controllers.yaml")
-
-    moveit_controllers = {
-        "moveit_simple_controller_manager": controllers_yaml,
-        "moveit_controller_manager": "moveit_simple_controller_manager/MoveItSimpleControllerManager",
-    }
-
-    trajectory_execution = {
-        "moveit_manage_controllers": True,
-        "trajectory_execution.allowed_execution_duration_scaling": 100.0,
-        "trajectory_execution.allowed_goal_duration_margin": 0.5,
-        "trajectory_execution.allowed_start_tolerance": 0.01,
-    }
-
-    planning_scene_monitor_parameters = {
-        "publish_planning_scene": True,
-        "publish_geometry_updates": True,
-        "publish_state_updates": True,
-        "publish_transforms_updates": True,
-    }
-
-    move_group_node = Node(
-        package="moveit_ros_move_group",
-        executable="move_group",
-        output="screen",
-        parameters=[
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-            ompl_planning_pipeline_config,
-            trajectory_execution,
-            moveit_controllers,
-            planning_scene_monitor_parameters,
-            {"use_sim_time": use_sim_time},
-        ],
-    )
-
-    rviz_node = Node(
-        package="rviz2",
-        executable="rviz2",
-        name="rviz2",
-        output="log",
-        arguments=["-d", rviz_config_file],
-        parameters=[
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-        ],
-        condition=IfCondition(launch_rviz),
-    )
-    # TODO: Launch skill servers in other launch file
-    move_topose_action_server = Node(
-        package="rbs_skill_servers",
-        executable="move_topose_action_server",
-        parameters=[
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-            {"use_sim_time": use_sim_time},
-        ]
-    )
-
-    gripper_control_node = Node(
-        package="rbs_skill_servers",
-        executable="gripper_control_action_server",
-        parameters= [
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-            {"use_sim_time": use_sim_time},
-        ],
-        condition=IfCondition(with_gripper_condition)
-    )
-
-    move_cartesian_path_action_server = Node(
-        package="rbs_skill_servers",
-        executable="move_cartesian_path_action_server",
-        parameters=[
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-            {"use_sim_time": use_sim_time},
-        ]
-    )
-    
-    move_joint_state_action_server = Node(
-        package="rbs_skill_servers",
-        executable="move_to_joint_states_action_server",
-        parameters=[
-            robot_description,
-            robot_description_semantic,
-            robot_description_kinematics,
-            {"use_sim_time": use_sim_time},
-        ]
-    )
-        
-    grasp_pose_loader = Node(
-        package="rbs_skill_servers",
-        executable="pick_place_pose_loader_service_server",
-        output="screen",
-        emulate_tty=True,
-        parameters=[
-            points_params
-        ]
-    )
-
-    assemble_state = Node(
-        package="rbs_skill_servers",
-        executable="assemble_state_service_server",
-        output="screen",
-        parameters=[
-            {'assemble_tf_prefix': 'ASSEMBLE_'},
-            {'assemble_dir': assemble_dir}
-        ]
-    )
-
-    moveit_planning_scene_init = Node(
-        package="rbs_skill_servers",
-        executable="moveit_update_planning_scene_service_server",
-        output="screen",
-        parameters=[
-            {'init_scene': world_config_file},
-            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
-        ]
-    )
-
-    moveit_planning_scene_init = Node(
-        package="rbs_skill_servers",
-        executable="moveit_update_planning_scene_service_server",
-        output="screen",
-        parameters=[
-            {'init_scene': world_config_file},
-            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
-        ]
-    )
-
-    moveit_planning_scene_init = Node(
-        package="rbs_skill_servers",
-        executable="moveit_update_planning_scene_service_server",
-        output="screen",
-        parameters=[
-            {'init_scene': world_config_file},
-            {'models_paths': os.environ['IGN_GAZEBO_RESOURCE_PATH']}
-        ]
-    )
-    
-    # add_planning_scene_object = Node(
-    #     package="rbs_skill_servers",
-    #     executable="add_planning_scene_object_service",
-    #     output="screen",
-    #     parameters=[
-    #         robot_description,
-    #         robot_description_semantic,
-    #         robot_description_kinematics,
-    #         {"use_sim_time": use_sim_time},
-    #     ]
-    # )
-    
-    # remappings = [('/camera', '/camera/image'),
-    #               ('/camera_info', '/camera/camera_info')]
-    # # Bridge
-    # bridge = Node(
-    #     package='ros_gz_bridge',
-    #     executable='parameter_bridge',
-    #     arguments=[
-    #         '/camera@sensor_msgs/msg/Image@gz.msgs.Image',
-    #         '/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
-    #         '/rgbd_camera/image@sensor_msgs/msg/Image@gz.msgs.Image',
-    #         '/rgbd_camera/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
-    #         '/rgbd_camera/depth_image@sensor_msgs/msg/Image@gz.msgs.Image',
-    #         '/rgbd_camera/points@sensor_msgs/msg/PointCloud2@gz.msgs.PointCloudPacked'
-    #     ],
-    #     output='screen',
-    #     remappings=remappings,
-    # )
-    
-    # pc_filter = Node(
-    #     package="rbs_perception",
-    #     executable="pc_filter",
-    #     output="screen",
-    #     #tf_prefix=['xterm -e gdb -ex run --args'],
-    # )
-    
-    grasp_marker = Node(
-        package="rbs_perception",
-        executable="grasp_marker_publish.py",
-    )
-    
-    nodes_to_start = [
-        control_node,
-        robot_state_publisher_node,
-        joint_state_broadcaster_spawner,
-        rviz_node,
-        initial_joint_controller_spawner_stopped,
-        initial_joint_controller_spawner_started,
-        gazebo,
-        gazebo_spawn_robot,
-        move_group_node,
-        gripper_controller,
-        gripper_control_node,
-        move_topose_action_server,
-        move_cartesian_path_action_server,
-        move_joint_state_action_server,
-        grasp_pose_loader,
-        assemble_state,
-        moveit_planning_scene_init,
-        #add_planning_scene_object
-    ]
-
-    return LaunchDescription(declared_arguments + nodes_to_start)
--- a/rbs_simulation/launch/simulation_gazebo.launch.py
+++ b/rbs_simulation/launch/simulation_gazebo.launch.py
@ -18,7 +18,7 @@ def generate_launch_description():
    )
    declared_arguments.append(
        DeclareLaunchArgument("gazebo_gui",
-                              default_value="true",
+                              default_value="false",
                              description="Launch env_manager?")
    )
    declared_arguments.append(
@ -32,14 +32,15 @@ def generate_launch_description():
                              description="Launch env_manager?")
    )
    declared_arguments.append(
-        DeclareLaunchArgument("gazebo_world_filename",
-                              default_value="",
-                              description="Launch env_manager?")
+        DeclareLaunchArgument("rgbd_camera",
+                              default_value="true",
+                              description="Camera are used?")
    )
   
    sim_gazebo = LaunchConfiguration("sim_gazebo")
    gazebo_gui = LaunchConfiguration("gazebo_gui")
    debugger = LaunchConfiguration("debugger")
+    rgbd_camera = LaunchConfiguration("rgbd_camera")
    launch_env_manager = LaunchConfiguration("launch_env_manager")
    gazebo_world_filename = LaunchConfiguration("gazebo_world_filename")

@ -51,7 +52,7 @@ def generate_launch_description():
                [os.path.join(get_package_share_directory('ros_gz_sim'),
                            'launch', 'gz_sim.launch.py')]),
            launch_arguments={
-                'gz_args': [' -r ',world_config_file, " -s"],
+                'gz_args': [' -s ', '-r ', world_config_file],
                "debugger": debugger,
            }.items(),
            condition=UnlessCondition(gazebo_gui))
@ -61,7 +62,7 @@ def generate_launch_description():
                [os.path.join(get_package_share_directory('ros_gz_sim'),
                            'launch', 'gz_sim.launch.py')]),
            launch_arguments={
-                'gz_args': [' -r ',world_config_file],
+                'gz_args': [' -r ', world_config_file],
                "debugger": debugger,
            }.items(),
            condition=IfCondition(gazebo_gui))
@ -72,12 +73,27 @@ def generate_launch_description():
        parameters=[{'use_sim_time': True}],
        condition=IfCondition(launch_env_manager)
    )
+
+    rgbd_sensor_bridge = Node(
+        package="ros_gz_bridge",
+        executable="parameter_bridge",
+        arguments=[
+            '/rgbd_camera/image@sensor_msgs/msg/Image@gz.msgs.Image',
+            '/rgbd_camera/camera_info@sensor_msgs/msg/CameraInfo@gz.msgs.CameraInfo',
+            '/rgbd_camera/depth_image@sensor_msgs/msg/Image@gz.msgs.Image',
+            '/rgbd_camera/points@sensor_msgs/msg/PointCloud2@gz.msgs.PointCloudPacked'
+        ],
+        output='screen',
+        condition=IfCondition(rgbd_camera)
+    )
    
   
    clock_bridge = Node(
        package='ros_gz_bridge',
        executable='parameter_bridge',
-        arguments=['/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'],
+        arguments=[
+            '/clock@rosgraph_msgs/msg/Clock[ignition.msgs.Clock'
+        ],
        output='screen',
        condition=IfCondition(sim_gazebo))

@ -85,6 +101,7 @@ def generate_launch_description():
        gazebo,
        gazebo_server,
        clock_bridge,
+        rgbd_sensor_bridge,
        env_manager
    ]
    return LaunchDescription(declared_arguments + nodes_to_start)
--- a/rbs_simulation/worlds/asm2.sdf
+++ b/rbs_simulation/worlds/asm2.sdf
@ -4,9 +4,18 @@
    <physics name='1ms' type='ignored'>
      <max_step_size>0.001</max_step_size>
      <real_time_factor>1.0</real_time_factor>
-      <real_time_update_rate>1000</real_time_update_rate>
+      <dart>
+        <collision_detector>bullet</collision_detector>
+        <solver>
+          <solver_type>pgs</solver_type>
+        </solver>
+      </dart>
    </physics>
-    <plugin name='ignition::gazebo::systems::Physics' filename='ignition-gazebo-physics-system'/>
+    <plugin name='ignition::gazebo::systems::Physics' filename='ignition-gazebo-physics-system'>
+      <engine>
+        <filename>ignition-physics-dartsim-plugin</filename>
+      </engine>
+    </plugin>
    <plugin name='ignition::gazebo::systems::UserCommands' filename='ignition-gazebo-user-commands-system'/>
    <plugin name='ignition::gazebo::systems::SceneBroadcaster' filename='ignition-gazebo-scene-broadcaster-system'/>
    <plugin name='ignition::gazebo::systems::Contact' filename='ignition-gazebo-contact-system'/>
@ -94,15 +103,20 @@
      </link>
    </model>
    <!-- Manipulating objects -->
-			<!-- <include> -->
-			<!-- 	<name>board</name> -->
-			<!-- 	<uri>model://board</uri> -->
-   <!--      <pose>0.45 0.0 0.0 0.0 0.0 0.0</pose> -->
-			<!-- </include> -->
-			<!-- <include> -->
-			<!-- 	<name>bishop</name> -->
-			<!-- 	<uri>model://bishop</uri> -->
-   <!--      <pose>0.35 0.0 0.0 0.0 0.0 0.0</pose> -->
-			<!-- </include> -->
+			<include>
+				<name>board</name>
+				<uri>model://board</uri>
+        <pose>0.45 0.0 0.0 0.0 0.0 0.0</pose>
+			</include>
+			<include>
+				<name>bishop</name>
+				<uri>model://bishop</uri>
+        <pose>0.35 0.0 0.0 0.0 0.0 0.0</pose>
+			</include>
+    <!-- <include> -->
+    <!--   <name>bishop</name> -->
+    <!--   <uri>model://box1</uri> -->
+    <!--   <pose>0.45 0.0 0 0 0 0</pose> -->
+    <!-- </include> -->
  </world>
 </sdf>
--- a/rbs_skill_servers/CMakeLists.txt
+++ b/rbs_skill_servers/CMakeLists.txt
@ -34,6 +34,7 @@ find_package(tinyxml2_vendor REQUIRED)
 find_package(TinyXML2 REQUIRED)
 find_package(Eigen3 3.3 REQUIRED)
 find_package(rbs_utils REQUIRED)
+find_package(moveit_servo REQUIRED)

 # Default to Fortress
 set(SDF_VER 12)
@ -79,6 +80,7 @@ set(deps
    moveit_ros_planning
    moveit_ros_planning_interface
    moveit_msgs
+    moveit_servo
    geometry_msgs
    tf2_ros
    rclcpp_components
@ -133,6 +135,11 @@ add_executable(move_cartesian_path_action_server
               src/move_cartesian_path_action_server.cpp)
 ament_target_dependencies(move_cartesian_path_action_server ${deps})

+
+add_executable(servo_action_server
+               src/moveit_servo_skill_server.cpp)
+ament_target_dependencies(servo_action_server ${deps})
+
 install(DIRECTORY include/ DESTINATION include)

 install(DIRECTORY launch config DESTINATION share/${PROJECT_NAME})
@ -143,6 +150,7 @@ install(
          pick_place_pose_loader
          move_to_joint_states_action_server
          move_cartesian_path_action_server
+          servo_action_server
  ARCHIVE DESTINATION lib
  LIBRARY DESTINATION lib
  RUNTIME DESTINATION lib/${PROJECT_NAME})
--- a/rbs_skill_servers/launch/skills.launch.py
+++ b/rbs_skill_servers/launch/skills.launch.py
@ -26,6 +26,10 @@ def launch_setup(context, *args, **kwargs):
        "rbs_skill_servers", "config/gripperPositions.yaml"
    )

+    kinematics_yaml = load_yaml("rbs_arm", "config/kinematics.yaml")
+
+    robot_description_kinematics = {"robot_description_kinematics": kinematics_yaml}
+
    move_topose_action_server = Node(
        package="rbs_skill_servers",
        executable="move_topose_action_server",
@ -38,12 +42,6 @@ def launch_setup(context, *args, **kwargs):
        ]
    )

-    move_to_pose = Node(
-        package="rbs_skill_servers",
-        executable="move_to_pose.py",
-        namespace=namespace
-    )
-
    gripper_control_node = Node(
        package="rbs_skill_servers",
        executable="gripper_control_action_server",
@ -68,9 +66,17 @@ def launch_setup(context, *args, **kwargs):
            {"use_sim_time": use_sim_time},
        ]
    )
+
+    cartesian_move_to_pose_action_server = Node(
+        package="rbs_skill_servers",
+        executable="move_to_pose.py",
+        namespace=namespace,
+        parameters=[
+            {"use_sim_time": use_sim_time},
+            {"robot_name": namespace}
+            ]
+    )
    
-    # FIXME: The name of this node, "move_topose," 
-    # is intended to be different from the actual MoveToPose node.
    move_joint_state_action_server = Node(
        package="rbs_skill_servers",
        executable="move_to_joint_states_action_server",
@ -95,7 +101,7 @@ def launch_setup(context, *args, **kwargs):
        gripper_control_node,
        move_cartesian_path_action_server,
        move_joint_state_action_server,
-        move_to_pose,
+        cartesian_move_to_pose_action_server,
        # grasp_pose_loader
    ]
    return nodes_to_start
--- a/rbs_skill_servers/package.xml
+++ b/rbs_skill_servers/package.xml
@ -13,6 +13,7 @@
  <depend>moveit_core</depend>
  <depend>moveit_ros_planning</depend>
  <depend>moveit_ros_planning_interface</depend>
+  <depend>moveit_servo</depend>
  <depend>moveit_msgs</depend>
  <depend>tf2_ros</depend>
  <depend>rclcpp_action</depend>
--- a/rbs_skill_servers/scripts/move_to_pose.py
+++ b/rbs_skill_servers/scripts/move_to_pose.py
@ -5,35 +5,40 @@ from rclpy.node import Node
 import numpy as np
 from rclpy.callback_groups import ReentrantCallbackGroup
 from rclpy.executors import MultiThreadedExecutor
-
+import math
 from geometry_msgs.msg import Pose, PoseStamped
 from rbs_skill_interfaces.action import MoveitSendPose
+from scipy.spatial.transform import Rotation as R
+from scipy.spatial.transform import Slerp

-class PoseSubscriber(Node):
-    def __init__(self, parent=None):
-        super().__init__('pose_subscriber')
-        self.parent = parent
-        self._sub = self.create_subscription(PoseStamped,
-                                             "/cartesian_motion_controller/current_pose",
-                                             self.parent.on_pose_callback, 1,
-                                             callback_group=self.parent._callback_group)
-        self.get_logger().info('PoseSubscriber node initialized')

 class CartesianMoveToPose(Node):

    def __init__(self):
-        super().__init__('cartesian_move_to_pose')
+        super().__init__('cartesian_move_to_pose') # pyright: ignore[]
+
+        self.declare_parameter("base_link", "base_link")
+        self.declare_parameter("robot_name", "")
+
        self._callback_group = ReentrantCallbackGroup()
        self._action_server = ActionServer(
            self,
            MoveitSendPose,
            'cartesian_move_to_pose',
            self.execute_callback, callback_group=self._callback_group)
+        # for multirobot setup where each robot name is a namespace
+        self.robot_name: str = self.get_parameter("robot_name").get_parameter_value().string_value
+        self.robot_name = self.robot_name.lstrip('/').rstrip('/')
+        self.robot_name = f"/{self.robot_name}" if self.robot_name else ""
        self._pub = self.create_publisher(PoseStamped,
-                                          "/cartesian_motion_controller/target_frame", 1,
+                                          f"{self.robot_name}/cartesian_motion_controller/target_frame", 1,
                                          callback_group=self._callback_group)
        self.current_pose = None
        self.goal_tolerance = 0.05
+        self.max_speed = 0.1
+        self.max_acceleration = 0.05
+        self.base_link = self.get_parameter("base_link").get_parameter_value().string_value
+

    def on_pose_callback(self, msg: PoseStamped):
        if isinstance(msg, PoseStamped):
@ -41,13 +46,22 @@ class CartesianMoveToPose(Node):

    def execute_callback(self, goal_handle):
        self.get_logger().debug(f"Executing goal {goal_handle.request.target_pose}")
-        tp = PoseStamped()
-        tp.pose = goal_handle.request.target_pose
-        tp.header.stamp = self.get_clock().now().to_msg()
-        tp.header.frame_id = "base_link"
+        target_pose = goal_handle.request.target_pose
+        start_pose = self.current_pose.pose if self.current_pose else None

-        while self.get_distance_to_target(tp.pose) >= self.goal_tolerance:
+        if start_pose is None:
+            self.get_logger().error("Current pose is not available")
+            goal_handle.abort()
+            return MoveitSendPose.Result()
+
+        trajectory = self.generate_trajectory(start_pose, target_pose)
+        for point in trajectory:
+            tp = PoseStamped()
+            tp.pose = point
+            tp.header.stamp = self.get_clock().now().to_msg()
+            tp.header.frame_id = self.base_link
            self._pub.publish(tp)
+            rclpy.spin_once(self, timeout_sec=0.1)

        goal_handle.succeed()

@ -55,6 +69,77 @@ class CartesianMoveToPose(Node):
        result.success = True
        return result

+    def generate_trajectory(self, start_pose, target_pose):
+        start_position = np.array([
+            start_pose.position.x,
+            start_pose.position.y,
+            start_pose.position.z
+        ])
+        target_position = np.array([
+            target_pose.position.x,
+            target_pose.position.y,
+            target_pose.position.z
+        ])
+
+        start_orientation = R.from_quat([
+            start_pose.orientation.x,
+            start_pose.orientation.y,
+            start_pose.orientation.z,
+            start_pose.orientation.w
+        ])
+        target_orientation = R.from_quat([
+            target_pose.orientation.x,
+            target_pose.orientation.y,
+            target_pose.orientation.z,
+            target_pose.orientation.w
+        ])
+
+        distance = np.linalg.norm(target_position - start_position)
+        max_speed = self.max_speed
+        max_acceleration = self.max_acceleration
+
+        t_acc = max_speed / max_acceleration
+        d_acc = 0.5 * max_acceleration * t_acc**2
+
+        if distance < 2 * d_acc:
+            t_acc = math.sqrt(distance / max_acceleration)
+            t_flat = 0
+        else:
+            t_flat = (distance - 2 * d_acc) / max_speed
+
+        total_time = 2 * t_acc + t_flat
+        num_points = int(total_time * 10)
+        trajectory = []
+
+        times = np.linspace(0, total_time, num_points + 1)
+        key_rots = R.from_quat([start_orientation.as_quat(), target_orientation.as_quat()])
+        slerp = Slerp([0, total_time], key_rots)
+
+        for t in times:
+            if t < t_acc:
+                fraction = 0.5 * max_acceleration * t**2 / distance
+            elif t < t_acc + t_flat:
+                fraction = (d_acc + max_speed * (t - t_acc)) / distance
+            else:
+                t_decel = t - t_acc - t_flat
+                fraction = (d_acc + max_speed * t_flat + 0.5 * max_acceleration * t_decel**2) / distance
+
+            intermediate_position = start_position + fraction * (target_position - start_position)
+            intermediate_orientation = slerp([t])[0]
+
+            intermediate_pose = Pose()
+            intermediate_pose.position.x = intermediate_position[0]
+            intermediate_pose.position.y = intermediate_position[1]
+            intermediate_pose.position.z = intermediate_position[2]
+            intermediate_orientation_quat = intermediate_orientation.as_quat()
+            intermediate_pose.orientation.x = intermediate_orientation_quat[0]
+            intermediate_pose.orientation.y = intermediate_orientation_quat[1]
+            intermediate_pose.orientation.z = intermediate_orientation_quat[2]
+            intermediate_pose.orientation.w = intermediate_orientation_quat[3]
+            trajectory.append(intermediate_pose)
+
+        return trajectory
+
    def get_distance_to_target(self, target_pose: Pose):
        if self.current_pose is None or self.current_pose.pose is None:
            self.get_logger().warn("Current pose is not available")
@ -65,36 +150,40 @@ class CartesianMoveToPose(Node):
        current_position = np.array([
            current_pose.position.x,
            current_pose.position.y,
-            current_pose.position.z,
-            current_pose.orientation.x,
-            current_pose.orientation.y,
-            current_pose.orientation.z
+            current_pose.position.z
        ])

        target_position = np.array([
            target_pose.position.x,
            target_pose.position.y,
-            target_pose.position.z,
-            target_pose.orientation.x,
-            target_pose.orientation.y,
-            target_pose.orientation.z
+            target_pose.position.z
        ])

-        # Проверка на наличие значений в массивах координат
        if np.any(np.isnan(current_position)) or np.any(np.isnan(target_position)):
            self.get_logger().error("Invalid coordinates")
            return None

-        # Вычисляем расстояние между текущей и целевой позициями
        distance = np.linalg.norm(current_position - target_position)

        return distance

+
+class PoseSubscriber(Node):
+    def __init__(self, parent: CartesianMoveToPose, robot_name: str):
+        super().__init__('pose_subscriber') # pyright: ignore[]
+        self.parent = parent
+        self._sub = self.create_subscription(PoseStamped,
+                                              f"{robot_name}/cartesian_motion_controller/current_pose",
+                                             self.parent.on_pose_callback, 1,
+                                             callback_group=self.parent._callback_group)
+        self.get_logger().info('PoseSubscriber node initialized')
+
 def main(args=None):
    rclpy.init(args=args)

    cartesian_move_to_pose = CartesianMoveToPose()
-    pose_subscriber = PoseSubscriber(parent=cartesian_move_to_pose)
+    pose_subscriber = PoseSubscriber(parent=cartesian_move_to_pose, 
+                                     robot_name=cartesian_move_to_pose.robot_name)

    executor = MultiThreadedExecutor()
    executor.add_node(cartesian_move_to_pose)
--- a/rbs_skill_servers/scripts/test_cartesian_controller.py
+++ b/rbs_skill_servers/scripts/test_cartesian_controller.py
@ -2,30 +2,36 @@
 import rclpy
 from rclpy.node import Node
 import argparse
-
 from geometry_msgs.msg import PoseStamped

 class CartesianControllerPublisher(Node):

-    def __init__(self, robot_name: str):
+    def __init__(self, robot_name: str, poses: dict):
        super().__init__("cartesian_controller_pose_publisher")
        self.publisher_ = self.create_publisher(
            PoseStamped, 
            "/" + robot_name + "/cartesian_motion_controller/target_frame", 10)
        timer_period = 0.5  # seconds
        self.timer = self.create_timer(timer_period, self.timer_callback)
+        self.robot_name = robot_name
+        self.poses = poses

    def timer_callback(self):
+        pose = self.poses.get(self.robot_name, {
+            'position': {'x': 0.0, 'y': 0.0, 'z': 0.0},
+            'orientation': {'x': 0.0, 'y': 0.0, 'z': 0.0, 'w': 1.0}
+        })
+
        msg = PoseStamped()
        msg.header.stamp = self.get_clock().now().to_msg()
        msg.header.frame_id = "base_link"
-        msg.pose.position.x = 0.7
-        msg.pose.position.y = 0.0
-        msg.pose.position.z = 0.45
-        msg.pose.orientation.x = 0.0
-        msg.pose.orientation.y = 0.707
-        msg.pose.orientation.z = 0.0
-        msg.pose.orientation.w = 0.707
+        msg.pose.position.x = pose['position']['x']
+        msg.pose.position.y = pose['position']['y']
+        msg.pose.position.z = pose['position']['z']
+        msg.pose.orientation.x = pose['orientation']['x']
+        msg.pose.orientation.y = pose['orientation']['y']
+        msg.pose.orientation.z = pose['orientation']['z']
+        msg.pose.orientation.w = pose['orientation']['w']

        self.publisher_.publish(msg)

@ -36,7 +42,21 @@ def main(args=None):
    parser = argparse.ArgumentParser(description='ROS2 Minimal Publisher')
    parser.add_argument('--robot-name', type=str, default='arm0', help='Specify the robot name')
    args = parser.parse_args()
-    minimal_publisher = CartesianControllerPublisher(args.robot_name)
+
+    # Define poses for each robot
+    poses = {
+        'arm2': {
+            'position': {'x': -0.3, 'y': 0.0, 'z': 0.45},
+            'orientation': {'x': 0.0, 'y': -0.707, 'z': 0.0, 'w': 0.707}
+        },
+        'arm1': {
+            'position': {'x': 0.3, 'y': 0.0, 'z': 0.45},
+            'orientation': {'x': 0.0, 'y': 0.707, 'z': 0.0, 'w': 0.707}
+        }
+        # Add more robots and their poses as needed
+    }
+
+    minimal_publisher = CartesianControllerPublisher(args.robot_name, poses)

    rclpy.spin(minimal_publisher)

--- a/rbs_skill_servers/scripts/test_cartesian_controller_single.py
+++ b/rbs_skill_servers/scripts/test_cartesian_controller_single.py
@ -0,0 +1,47 @@
+import rclpy
+from rclpy.node import Node
+import argparse
+
+from geometry_msgs.msg import PoseStamped
+
+class CartesianControllerPublisher(Node):
+
+    def __init__(self, robot_name: str):
+        super().__init__("cartesian_controller_pose_publisher")
+        self.publisher_ = self.create_publisher(
+            PoseStamped, 
+            "/cartesian_motion_controller/target_frame", 10)
+        timer_period = 0.5  # seconds
+        self.timer = self.create_timer(timer_period, self.timer_callback)
+
+    def timer_callback(self):
+        msg = PoseStamped()
+        msg.header.stamp = self.get_clock().now().to_msg()
+        msg.header.frame_id = "base_link"
+        msg.pose.position.x = 0.2
+        msg.pose.position.y = 0.2
+        msg.pose.position.z = 0.2
+        msg.pose.orientation.x = 0.0
+        msg.pose.orientation.y = 1.0
+        msg.pose.orientation.z = 0.0
+        msg.pose.orientation.w = 0.0
+
+        self.publisher_.publish(msg)
+
+
+def main(args=None):
+    rclpy.init(args=args)
+
+    parser = argparse.ArgumentParser(description='ROS2 Minimal Publisher')
+    parser.add_argument('--robot-name', type=str, default='arm0', help='Specify the robot name')
+    args = parser.parse_args()
+    minimal_publisher = CartesianControllerPublisher(args.robot_name)
+
+    rclpy.spin(minimal_publisher)
+
+    minimal_publisher.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/rbs_skill_servers/src/move_cartesian_path_action_server.cpp
+++ b/rbs_skill_servers/src/move_cartesian_path_action_server.cpp
@ -1,5 +1,7 @@
 #include <functional>
 #include <memory>
+#include <moveit/robot_trajectory/robot_trajectory.h>
+#include <moveit/trajectory_processing/time_parameterization.h>
 #include <thread>

 #include "rclcpp/rclcpp.hpp"
@ -19,6 +21,7 @@
 #include "moveit/move_group_interface/move_group_interface.h"
 #include "moveit/planning_interface/planning_interface.h"
 #include "moveit/robot_model_loader/robot_model_loader.h"
+#include "moveit/trajectory_processing/time_optimal_trajectory_generation.h"

 /*
 #include <tf2/LinearMath/Quaternion.h>
@ -122,23 +125,59 @@ private:
    std::vector<geometry_msgs::msg::Pose> waypoints;
    auto current_pose = move_group_interface.getCurrentPose();
    // waypoints.push_back(current_pose.pose);
+    // geometry_msgs::msg::Pose start_pose = current_pose.pose;
    geometry_msgs::msg::Pose target_pose = goal->target_pose;
    // target_pose.position = goal->target_pose.position;
+    // int num_waypoints = 100;
+    // for (int i = 1; i <= num_waypoints; ++i) {
+    //   geometry_msgs::msg::Pose intermediate_pose;
+    //   double fraction = static_cast<double>(i) / (num_waypoints + 1);
+    //
+    //   intermediate_pose.position.x =
+    //       start_pose.position.x +
+    //       fraction * (target_pose.position.x - start_pose.position.x);
+    //   intermediate_pose.position.y =
+    //       start_pose.position.y +
+    //       fraction * (target_pose.position.y - start_pose.position.y);
+    //   intermediate_pose.position.z =
+    //       start_pose.position.z +
+    //       fraction * (target_pose.position.z - start_pose.position.z);
+    //
+    //   intermediate_pose.orientation = start_pose.orientation;
+    //
+    //   waypoints.push_back(intermediate_pose);
+    // }
    waypoints.push_back(target_pose);

    RCLCPP_INFO(this->get_logger(), "New cartesian target pose [%f, %f, %f]",
                target_pose.position.x, target_pose.position.y,
                target_pose.position.z);
-    // waypoints.push_back(start_pose.pose);
+
    moveit_msgs::msg::RobotTrajectory trajectory;
    const double jump_threshold = 0.0;
    const double eef_step = 0.001;
    double fraction = move_group_interface.computeCartesianPath(
        waypoints, eef_step, jump_threshold, trajectory);
+
+    robot_trajectory::RobotTrajectory rt(
+        move_group_interface.getCurrentState()->getRobotModel(),
+        goal->robot_name);
+
+    rt.setRobotTrajectoryMsg(*move_group_interface.getCurrentState(), trajectory);
+
+    trajectory_processing::TimeOptimalTrajectoryGeneration tp;
+
+    bool su = tp.computeTimeStamps(rt);
+
+    rt.getRobotTrajectoryMsg(trajectory);
+
+    moveit::planning_interface::MoveGroupInterface::Plan plan;
+    plan.trajectory_ = trajectory;
+
    if (fraction > 0) {
      RCLCPP_INFO(this->get_logger(), "Planning success");
      moveit::core::MoveItErrorCode execute_err_code =
-          move_group_interface.execute(trajectory);
+          move_group_interface.execute(plan);
      if (execute_err_code == moveit::core::MoveItErrorCode::SUCCESS) {
        goal_handle->succeed(result);
        RCLCPP_INFO(this->get_logger(), "Successfully executed action goal");
--- a/rbs_skill_servers/src/moveit_servo_skill_server.cpp
+++ b/rbs_skill_servers/src/moveit_servo_skill_server.cpp
@ -0,0 +1,232 @@
+#include <functional>
+#include <geometry_msgs/msg/detail/pose_stamped__struct.hpp>
+#include <geometry_msgs/msg/detail/transform_stamped__struct.hpp>
+#include <memory>
+#include <rclcpp/publisher.hpp>
+#include <rclcpp/qos.hpp>
+#include <tf2/LinearMath/Transform.h>
+#include <tf2/convert.h>
+#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
+#include <thread>
+
+#include "rclcpp/rclcpp.hpp"
+#include "rclcpp/timer.hpp"
+#include "rclcpp_components/register_node_macro.hpp"
+
+// action libs
+#include "rbs_skill_interfaces/action/moveit_send_pose.hpp"
+#include "rbs_skill_interfaces/msg/action_feedback_status_constants.hpp"
+#include "rclcpp_action/rclcpp_action.hpp"
+
+#include "geometry_msgs/msg/pose_stamped.hpp"
+#include "geometry_msgs/msg/quaternion.hpp"
+#include "geometry_msgs/msg/transform.hpp"
+
+// moveit libs
+#include <moveit_servo/make_shared_from_pool.h>
+#include <moveit_servo/pose_tracking.h>
+#include <moveit_servo/servo.h>
+#include <moveit_servo/servo_parameters.h>
+#include <moveit_servo/status_codes.h>
+
+namespace rbs_skill_actions {
+
+class StatusMonitor {
+public:
+  StatusMonitor(const rclcpp::Node::SharedPtr &node, const std::string &topic)
+      : m_node(node) {
+    sub_ = node->create_subscription<std_msgs::msg::Int8>(
+        topic, rclcpp::SystemDefaultsQoS(),
+        [this](const std_msgs::msg::Int8::ConstSharedPtr &msg) {
+          return statusCB(msg);
+        });
+  }
+
+private:
+  rclcpp::Node::SharedPtr m_node;
+  void statusCB(const std_msgs::msg::Int8::ConstSharedPtr &msg) {
+    moveit_servo::StatusCode latest_status =
+        static_cast<moveit_servo::StatusCode>(msg->data);
+    if (latest_status != status_) {
+      status_ = latest_status;
+      const auto &status_str = moveit_servo::SERVO_STATUS_CODE_MAP.at(status_);
+      RCLCPP_INFO_STREAM(m_node->get_logger(), "Servo status: " << status_str);
+    }
+  }
+
+  moveit_servo::StatusCode status_ = moveit_servo::StatusCode::INVALID;
+  rclcpp::Subscription<std_msgs::msg::Int8>::SharedPtr sub_;
+};
+
+class MoveServoActionServer : public rclcpp::Node {
+public:
+  using MoveitSendPose = rbs_skill_interfaces::action::MoveitSendPose;
+
+  // explicit MoveCartesianActionServer(const rclcpp::Node::SharedPtr& node)
+  explicit MoveServoActionServer(const rclcpp::Node::SharedPtr &node)
+      : Node("move_servo_action_server"), m_node(node) {
+    auto servo_parameters =
+        moveit_servo::ServoParameters::makeServoParameters(node);
+    if (servo_parameters == nullptr) {
+      RCLCPP_FATAL(node->get_logger(), "Could not get servo parameters!");
+      // exit(EXIT_FAILURE);
+    }
+
+    // Load the planning scene monitor
+    m_planning_scene_monitor =
+        std::make_shared<planning_scene_monitor::PlanningSceneMonitor>(
+            node, "robot_description");
+    if (!m_planning_scene_monitor->getPlanningScene()) {
+      RCLCPP_ERROR_STREAM(node->get_logger(),
+                          "Error in setting up the PlanningSceneMonitor.");
+    }
+    m_planning_scene_monitor->providePlanningSceneService();
+    m_planning_scene_monitor->startSceneMonitor();
+    m_planning_scene_monitor->startWorldGeometryMonitor(
+        planning_scene_monitor::PlanningSceneMonitor::
+            DEFAULT_COLLISION_OBJECT_TOPIC,
+        planning_scene_monitor::PlanningSceneMonitor::
+            DEFAULT_PLANNING_SCENE_WORLD_TOPIC,
+        false /* skip octomap monitor */);
+    m_planning_scene_monitor->startStateMonitor("/joint_states");
+    m_planning_scene_monitor->startPublishingPlanningScene(
+        planning_scene_monitor::PlanningSceneMonitor::UPDATE_SCENE);
+
+    // Wait for Planning Scene Monitor to setup
+    if (!m_planning_scene_monitor->waitForCurrentRobotState(
+            node->now(), 5.0 /* seconds */)) {
+      RCLCPP_ERROR_STREAM(
+          node->get_logger(),
+          "Error waiting for current robot state in PlanningSceneMonitor.");
+    }
+
+    // Create the pose tracker
+    m_tracker = std::make_shared<moveit_servo::PoseTracking>(
+        node, servo_parameters, m_planning_scene_monitor);
+
+    m_status_monitor = std::make_shared<StatusMonitor>(node, servo_parameters->status_topic);
+
+  }
+
+  void init() {
+
+    m_action_server = rclcpp_action::create_server<MoveitSendPose>(
+        m_node->get_node_base_interface(), m_node->get_node_clock_interface(),
+        m_node->get_node_logging_interface(),
+        m_node->get_node_waitables_interface(), "move_servo",
+        std::bind(&MoveServoActionServer::goal_callback, this,
+                  std::placeholders::_1, std::placeholders::_2),
+        std::bind(&MoveServoActionServer::cancel_callback, this,
+                  std::placeholders::_1),
+        std::bind(&MoveServoActionServer::accepted_callback, this,
+                  std::placeholders::_1));
+
+    m_pose_pub = m_node->create_publisher<geometry_msgs::msg::PoseStamped>("target_pose", rclcpp::SystemDefaultsQoS());
+  }
+
+private:
+  rclcpp::Node::SharedPtr m_node;
+  rclcpp_action::Server<MoveitSendPose>::SharedPtr m_action_server;
+  rclcpp::Publisher<geometry_msgs::msg::PoseStamped>::SharedPtr m_pose_pub;
+  planning_scene_monitor::PlanningSceneMonitorPtr m_planning_scene_monitor;
+  moveit_servo::PoseTrackingPtr m_tracker;
+  std::shared_ptr<StatusMonitor> m_status_monitor;
+
+  using ServerGoalHandle = rclcpp_action::ServerGoalHandle<MoveitSendPose>;
+
+  rclcpp_action::GoalResponse
+  goal_callback(const rclcpp_action::GoalUUID &uuid,
+                std::shared_ptr<const MoveitSendPose::Goal> goal) {
+    RCLCPP_INFO(
+        this->get_logger(),
+        "Received goal request for robot [%s] with Pose [%f, %f, %f, %f, %f, "
+        "%f, %f]",
+        goal->robot_name.c_str(), goal->target_pose.position.x,
+        goal->target_pose.position.y, goal->target_pose.position.z,
+        goal->target_pose.orientation.x, goal->target_pose.orientation.y,
+        goal->target_pose.orientation.z, goal->target_pose.orientation.w);
+    (void)uuid;
+
+    return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE;
+  }
+
+  rclcpp_action::CancelResponse
+  cancel_callback(const std::shared_ptr<ServerGoalHandle> goal_handle) {
+    RCLCPP_INFO(this->get_logger(), "Received cancel request");
+    (void)goal_handle;
+    return rclcpp_action::CancelResponse::ACCEPT;
+  }
+
+  void accepted_callback(const std::shared_ptr<ServerGoalHandle> goal_handle) {
+    using namespace std::placeholders;
+    std::thread(std::bind(&MoveServoActionServer::execute, this, _1),
+                goal_handle)
+        .detach();
+    // std::thread(
+    //     [this, goal_handle]() {
+    //         execute(goal_handle);
+    //     }).detach();
+  }
+
+  void execute(const std::shared_ptr<ServerGoalHandle> goal_handle) {
+    RCLCPP_INFO(this->get_logger(), "Executing action goal");
+    const auto goal = goal_handle->get_goal();
+    auto result = std::make_shared<MoveitSendPose::Result>();
+
+    geometry_msgs::msg::TransformStamped current_ee_pose;
+    m_tracker->getCommandFrameTransform(current_ee_pose);
+    // Convert it to a Pose
+    geometry_msgs::msg::PoseStamped target_pose;
+    target_pose.header.frame_id = current_ee_pose.header.frame_id;
+    target_pose.pose = goal->target_pose;
+    // target_pose.pose.position.y = current_ee_pose.transform.translation.y;
+    // target_pose.pose.position.z = current_ee_pose.transform.translation.z;
+    // target_pose.pose.orientation = current_ee_pose.transform.rotation;
+
+    // target_pose.pose.position.x += 0.1;
+
+    m_tracker->resetTargetPose();
+
+    target_pose.header.stamp = m_node->now();
+    m_pose_pub->publish(target_pose);
+
+    Eigen::Vector3d lin_tol{ 0.001, 0.001, 0.001 };
+    double rot_tol = 0.01;
+    // Run the pose tracking
+    moveit_servo::PoseTrackingStatusCode tracking_status =
+        m_tracker->moveToPose(lin_tol, rot_tol, 0.1 /* target pose timeout */);
+
+    if (tracking_status == moveit_servo::PoseTrackingStatusCode::SUCCESS) {
+      result->success = true;
+      goal_handle->succeed(result);
+      RCLCPP_INFO(this->get_logger(), "Pose tracking succeeded.");
+    } else {
+      result->success = false;
+      goal_handle->abort(result);
+      RCLCPP_INFO(this->get_logger(), "Pose tracking failed with status: %d", static_cast<int>(tracking_status));
+    }
+    RCLCPP_INFO_STREAM(m_node->get_logger(), "Pose tracker exited with status: "
+                             << moveit_servo::POSE_TRACKING_STATUS_CODE_MAP.at(tracking_status));
+  }
+}; // class MoveCartesianActionServer
+
+} // namespace rbs_skill_actions
+
+int main(int argc, char **argv) {
+  rclcpp::init(argc, argv);
+  rclcpp::NodeOptions node_options;
+  // node_options.automatically_declare_parameters_from_overrides(true);
+  node_options.allow_undeclared_parameters();
+  auto node = rclcpp::Node::make_shared("move_servo", "", node_options);
+
+  rbs_skill_actions::MoveServoActionServer server(node);
+  std::thread run_server([&server]() {
+    rclcpp::sleep_for(std::chrono::seconds(3));
+    server.init();
+  });
+
+  rclcpp::spin(node);
+  run_server.join();
+
+  return 0;
+}
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`from .manipulation import ManipulationGazeboEnvRandomizer`