Tying it all together#
Now that our managers and scene are defined through classes, there’s one more step: instantiate these classes and set them up in a Manager Based Environment Configuration. We’ll do that together next.
After this, we’re ready to start training!
Instantiating managers#
Notice the lines such as observations = ObservationsCfg() are where we actually make an instance of the managers defined above.
We also set up key simulation parameters, including:
the number of environments (copies of the training setup on the stage - remember the fleet of robots we saw before - how many are there?)
the physical spacing between those environments
length of training episodes (remember our MDP timeout termination condition)
A training episode is a single trial in which a robot interacts with a simulated environment—starting from an initial state, taking actions based on its policy, receiving observations and rewards, and continuing until it reaches a termination condition (like falling over or exceeding time limits), after which the environment resets and a new episode begins
There are more settings below, which we’ll ignore for now, but you can read more here.
Replace this class in your reach_env_cfg.py file.
 1@configclass
 2class ReachEnvCfg(ManagerBasedRLEnvCfg):
 3    """Configuration for the reach end-effector pose tracking environment."""
 4
 5    # Scene settings - how many robots, how far apart?
 6    scene = ReachSceneCfg(num_envs=2000, env_spacing=2.5)
 7    # Basic settings
 8    observations = ObservationsCfg()
 9    actions = ActionsCfg()
10    commands: CommandsCfg = CommandsCfg()
11    # MDP settings
12    rewards = RewardsCfg()
13    terminations = TerminationsCfg()
14    events = EventCfg()
15    curriculum = CurriculumCfg()
16
17    def __post_init__(self):
18        """Post initialization."""
19        # general settings
20        self.decimation = 2
21        self.sim.render_interval = self.decimation
22        self.episode_length_s = 3.0
23        self.viewer.eye = (3.5, 3.5, 3.5)
24        # simulation settings
25        self.sim.dt = 1.0 / 60.0
Configuration for play#
During playback or the visualization of our results, we may want different environmental settings than we used at training.
For example, we may want less environment copies, or to remove randomization added to make our training more robust.
Add the following class below to create a custom environment configuration for playback.
 1@configclass
 2class ReachEnvCfg_PLAY(ReachEnvCfg):
 3    def __post_init__(self):
 4        # post init of parent
 5        super().__post_init__()
 6        # make a smaller scene for play
 7        self.scene.num_envs = 50
 8        self.scene.env_spacing = 2.5
 9        # disable randomization for play
10        self.observations.policy.enable_corruption = False
Register the entry point#
Now to be able to reference this subclassed version of our environment, we need to add it to the source/Reach/Reach/tasks/manager_based/reach/__init__.py file. Note how the second kwarg references the class we made above.
Add the following code to the bottom of this file source/Reach/Reach/tasks/manager_based/reach/__init__.py, to register ReachEnvCfg_PLAY class as an additional entry point.
1gym.register(
2    id="Template-Reach-Play-v0",
3    entry_point="isaaclab.envs:ManagerBasedRLEnv",
4    disable_env_checker=True,
5    kwargs={
6        "env_cfg_entry_point": f"{__name__}.reach_env_cfg:ReachEnvCfg_PLAY",
7        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
8    },
9)
We’ll show how to invoke this later!
Complete environment configuration#
Click to reveal the completed Python file reach_env_cfg.py
  1# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
  2# All rights reserved.
  3#
  4# SPDX-License-Identifier: BSD-3-Clause
  5
  6import math
  7import carb
  8
  9NUCLEUS_ASSET_ROOT_DIR = carb.settings.get_settings().get("/persistent/isaac/asset_root/cloud")
 10"""Path to the root directory on the Nucleus Server."""
 11
 12NVIDIA_NUCLEUS_DIR = f"{NUCLEUS_ASSET_ROOT_DIR}/NVIDIA"
 13"""Path to the root directory on the NVIDIA Nucleus Server."""
 14
 15ISAAC_NUCLEUS_DIR = f"{NUCLEUS_ASSET_ROOT_DIR}/Isaac"
 16"""Path to the ``Isaac`` directory on the NVIDIA Nucleus Server."""
 17
 18import isaaclab.sim as sim_utils
 19from isaaclab.assets import AssetBaseCfg
 20from isaaclab.envs import ManagerBasedRLEnvCfg
 21from isaaclab.managers import ActionTermCfg as ActionTerm
 22from isaaclab.managers import CurriculumTermCfg as CurrTerm
 23from isaaclab.managers import EventTermCfg as EventTerm
 24from isaaclab.managers import ObservationGroupCfg as ObsGroup
 25from isaaclab.managers import ObservationTermCfg as ObsTerm
 26from isaaclab.managers import RewardTermCfg as RewTerm
 27from isaaclab.managers import SceneEntityCfg
 28from isaaclab.managers import TerminationTermCfg as DoneTerm
 29from isaaclab.scene import InteractiveSceneCfg
 30from isaaclab.utils import configclass
 31from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise
 32from isaaclab.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg
 33
 34from . import mdp
 35
 36##
 37# Pre-defined configs
 38##
 39
 40from .ur_gripper import UR_GRIPPER_CFG  # isort:skip
 41
 42##
 43# Scene definition
 44##
 45
 46@configclass
 47class ReachSceneCfg(InteractiveSceneCfg):
 48    """Configuration for a scene."""
 49
 50    # world
 51    ground = AssetBaseCfg(
 52        prim_path="/World/ground",
 53        spawn=sim_utils.GroundPlaneCfg(),
 54        init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
 55    )
 56
 57    # robot
 58    robot = UR_GRIPPER_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
 59    
 60    # lights
 61    dome_light = AssetBaseCfg(
 62        prim_path="/World/DomeLight",
 63        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=5000.0),
 64    )
 65
 66    table = AssetBaseCfg(
 67        prim_path="{ENV_REGEX_NS}/Table",
 68        spawn=sim_utils.UsdFileCfg(
 69            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd",
 70        ),
 71        init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)),
 72    )
 73
 74    # plane
 75    plane = AssetBaseCfg(
 76        prim_path="/World/GroundPlane",
 77        init_state=AssetBaseCfg.InitialStateCfg(pos=[0, 0, -1.05]),
 78        spawn=GroundPlaneCfg(),
 79    )
 80
 81##
 82# MDP settings
 83##
 84
 85@configclass
 86class ActionsCfg:
 87    """Action specifications for the MDP."""
 88
 89    arm_action: ActionTerm = mdp.JointPositionActionCfg(
 90        asset_name="robot", 
 91        joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"], 
 92        scale=.5, 
 93        use_default_offset=True, 
 94        debug_vis=True
 95    )
 96
 97@configclass
 98class CommandsCfg:
 99    """Command terms for the MDP."""
100
101    ee_pose = mdp.UniformPoseCommandCfg(
102        asset_name="robot",
103        body_name="ee_link", # This is the body in the USD file
104        resampling_time_range=(4.0, 4.0),
105        debug_vis=True,
106        # These are essentially ranges of poses that can be commanded for the end of the robot during training
107        ranges=mdp.UniformPoseCommandCfg.Ranges(
108            pos_x=(0.35, 0.65),
109            pos_y=(-0.2, 0.2),
110            pos_z=(0.15, 0.5),
111            roll=(0.0, 0.0),
112            pitch=(math.pi / 2, math.pi / 2),
113            yaw=(-3.14, 3.14),
114        ),
115    )
116
117@configclass
118class ObservationsCfg:
119    """Observation specifications for the MDP."""
120
121    @configclass
122    class PolicyCfg(ObsGroup):
123        """Observations for policy group."""
124
125        # observation terms (order preserved)
126        joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
127        joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
128        pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"})
129        actions = ObsTerm(func=mdp.last_action)
130
131        def __post_init__(self):
132            self.enable_corruption = True
133            self.concatenate_terms = True
134
135    # observation groups
136    policy: PolicyCfg = PolicyCfg()
137
138@configclass
139class EventCfg:
140    """Configuration for events."""
141
142    reset_robot_joints = EventTerm(
143        func=mdp.reset_joints_by_scale,
144        mode="reset",
145        params={
146            "position_range": (0.75, 1.25),
147            "velocity_range": (0.0, 0.0),
148        },
149    )
150
151@configclass
152class RewardsCfg:
153    """Reward terms for the MDP."""
154
155    # task terms
156    end_effector_position_tracking = RewTerm(
157        func=mdp.position_command_error,
158        weight=-0.2,
159        params={"asset_cfg": SceneEntityCfg("robot", body_names=["ee_link"]), "command_name": "ee_pose"},
160    )
161    end_effector_position_tracking_fine_grained = RewTerm(
162        func=mdp.position_command_error_tanh,
163        weight=0.1,
164        params={"asset_cfg": SceneEntityCfg("robot", body_names=["ee_link"]), "std": 0.1, "command_name": "ee_pose"},
165    )
166
167    # action penalty
168    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.0001)
169    joint_vel = RewTerm(
170        func=mdp.joint_vel_l2,
171        weight=-0.0001,
172        params={"asset_cfg": SceneEntityCfg("robot")},
173    )
174
175@configclass
176class TerminationsCfg:
177    """Termination terms for the MDP."""
178
179    time_out = DoneTerm(func=mdp.time_out, time_out=True)
180
181@configclass
182class CurriculumCfg:
183    """Curriculum terms for the MDP."""
184
185    action_rate = CurrTerm(
186        func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.005, "num_steps": 4500}
187    )
188
189    joint_vel = CurrTerm(
190        func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500}
191    )
192
193##
194# Environment configuration
195##
196
197@configclass
198class ReachEnvCfg(ManagerBasedRLEnvCfg):
199    """Configuration for the reach end-effector pose tracking environment."""
200
201    # Scene settings - how many robots, how far apart?
202    scene = ReachSceneCfg(num_envs=2000, env_spacing=2.5)
203    # Basic settings
204    observations = ObservationsCfg()
205    actions = ActionsCfg()
206    commands: CommandsCfg = CommandsCfg()
207    # MDP settings
208    rewards = RewardsCfg()
209    terminations = TerminationsCfg()
210    events = EventCfg()
211    curriculum = CurriculumCfg()
212
213    def __post_init__(self):
214        """Post initialization."""
215        # general settings
216        self.decimation = 2
217        self.sim.render_interval = self.decimation
218        self.episode_length_s = 3.0
219        self.viewer.eye = (3.5, 3.5, 3.5)
220        # simulation settings
221        self.sim.dt = 1.0 / 60.0
222
223@configclass
224class ReachEnvCfg_PLAY(ReachEnvCfg):
225    def __post_init__(self):
226        # post init of parent
227        super().__post_init__()
228        # make a smaller scene for play
229        self.scene.num_envs = 50
230        self.scene.env_spacing = 2.5
231        # disable randomization for play
232        self.observations.policy.enable_corruption = False