runtime/env_manager/rbs_gym/hyperparams/sac.yml

42 lines
872 B
YAML

# Reach
Reach-Gazebo-v0:
policy: "MlpPolicy"
policy_kwargs:
n_critics: 2
net_arch: [128, 64]
n_timesteps: 200000
buffer_size: 25000
learning_starts: 5000
batch_size: 512
learning_rate: lin_0.0002
gamma: 0.95
tau: 0.001
ent_coef: "auto_0.1"
target_entropy: "auto"
train_freq: [1, "episode"]
gradient_steps: 100
noise_type: "normal"
noise_std: 0.025
use_sde: False
optimize_memory_usage: False
Reach-ColorImage-Gazebo-v0:
policy: "CnnPolicy"
policy_kwargs:
n_critics: 2
net_arch: [128, 128]
n_timesteps: 50000
buffer_size: 25000
learning_starts: 5000
batch_size: 32
learning_rate: lin_0.0002
gamma: 0.95
tau: 0.0005
ent_coef: "auto_0.1"
target_entropy: "auto"
train_freq: [1, "episode"]
gradient_steps: 100
noise_type: "normal"
noise_std: 0.025
use_sde: False
optimize_memory_usage: False