initial commit

This commit is contained in:
CaoWangrenbo
2025-06-14 21:18:24 +08:00
commit b03507ab4e
35 changed files with 2349 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
Python module serving as a project/extension template.
"""
# Register Gym environments.
from .tasks import *
# Register UI extensions.
from .ui_extension_example import *

View File

@@ -0,0 +1,17 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Package containing task implementations for the extension."""
##
# Register Gym environments.
##
from isaaclab_tasks.utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["utils", ".mdp"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym # noqa: F401

View File

@@ -0,0 +1,24 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Template-Flexr-V0-Direct-v0",
entry_point=f"{__name__}.flexr_v0_env:FlexrV0Env",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.flexr_v0_env_cfg:FlexrV0EnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:PPORunnerCfg",
},
)

View File

@@ -0,0 +1,4 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,78 @@
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole_direct
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001

View File

@@ -0,0 +1,37 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@configclass
class PPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole_direct"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)

View File

@@ -0,0 +1,132 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
import torch
from collections.abc import Sequence
import isaaclab.sim as sim_utils
from isaaclab.assets import Articulation
from isaaclab.envs import DirectRLEnv
from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from isaaclab.utils.math import sample_uniform
from .flexr_v0_env_cfg import FlexrV0EnvCfg
class FlexrV0Env(DirectRLEnv):
cfg: FlexrV0EnvCfg
def __init__(self, cfg: FlexrV0EnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
def _setup_scene(self):
self.robot = Articulation(self.cfg.robot_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate
self.scene.clone_environments(copy_from_source=False)
# add articulation to scene
self.scene.articulations["robot"] = self.robot
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: torch.Tensor) -> None:
self.actions = actions.clone()
def _apply_action(self) -> None:
self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
def _get_observations(self) -> dict:
obs = torch.cat(
(
self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
),
dim=-1,
)
observations = {"policy": obs}
return observations
def _get_rewards(self) -> torch.Tensor:
total_reward = compute_rewards(
self.cfg.rew_scale_alive,
self.cfg.rew_scale_terminated,
self.cfg.rew_scale_pole_pos,
self.cfg.rew_scale_cart_vel,
self.cfg.rew_scale_pole_vel,
self.joint_pos[:, self._pole_dof_idx[0]],
self.joint_vel[:, self._pole_dof_idx[0]],
self.joint_pos[:, self._cart_dof_idx[0]],
self.joint_vel[:, self._cart_dof_idx[0]],
self.reset_terminated,
)
return total_reward
def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
self.joint_pos = self.robot.data.joint_pos
self.joint_vel = self.robot.data.joint_vel
time_out = self.episode_length_buf >= self.max_episode_length - 1
out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
return out_of_bounds, time_out
def _reset_idx(self, env_ids: Sequence[int] | None):
if env_ids is None:
env_ids = self.robot._ALL_INDICES
super()._reset_idx(env_ids)
joint_pos = self.robot.data.default_joint_pos[env_ids]
joint_pos[:, self._pole_dof_idx] += sample_uniform(
self.cfg.initial_pole_angle_range[0] * math.pi,
self.cfg.initial_pole_angle_range[1] * math.pi,
joint_pos[:, self._pole_dof_idx].shape,
joint_pos.device,
)
joint_vel = self.robot.data.default_joint_vel[env_ids]
default_root_state = self.robot.data.default_root_state[env_ids]
default_root_state[:, :3] += self.scene.env_origins[env_ids]
self.joint_pos[env_ids] = joint_pos
self.joint_vel[env_ids] = joint_vel
self.robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
self.robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
self.robot.write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
@torch.jit.script
def compute_rewards(
rew_scale_alive: float,
rew_scale_terminated: float,
rew_scale_pole_pos: float,
rew_scale_cart_vel: float,
rew_scale_pole_vel: float,
pole_pos: torch.Tensor,
pole_vel: torch.Tensor,
cart_pos: torch.Tensor,
cart_vel: torch.Tensor,
reset_terminated: torch.Tensor,
):
rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
rew_termination = rew_scale_terminated * reset_terminated.float()
rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
total_reward = rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel
return total_reward

View File

@@ -0,0 +1,48 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import DirectRLEnvCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sim import SimulationCfg
from isaaclab.utils import configclass
@configclass
class FlexrV0EnvCfg(DirectRLEnvCfg):
# env
decimation = 2
episode_length_s = 5.0
# - spaces definition
action_space = 1
observation_space = 4
state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
# robot(s)
robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# custom parameters/scales
# - controllable joint
cart_dof_name = "slider_to_cart"
pole_dof_name = "cart_to_pole"
# - action scale
action_scale = 100.0 # [N]
# - reward scales
rew_scale_alive = 1.0
rew_scale_terminated = -2.0
rew_scale_pole_pos = -1.0
rew_scale_cart_vel = -0.01
rew_scale_pole_vel = -0.005
# - reset states/conditions
initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad]
max_cart_pos = 3.0 # reset if cart exceeds this position [m]

View File

@@ -0,0 +1,46 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import omni.ext
# Functions and vars are available to other extension as usual in python: `example.python_ext.some_public_function(x)`
def some_public_function(x: int):
print("[FLEXR_v0] some_public_function was called with x: ", x)
return x**x
# Any class derived from `omni.ext.IExt` in top level module (defined in `python.modules` of `extension.toml`) will be
# instantiated when extension gets enabled and `on_startup(ext_id)` will be called. Later when extension gets disabled
# on_shutdown() is called.
class ExampleExtension(omni.ext.IExt):
# ext_id is current extension id. It can be used with extension manager to query additional information, like where
# this extension is located on filesystem.
def on_startup(self, ext_id):
print("[FLEXR_v0] startup")
self._count = 0
self._window = omni.ui.Window("My Window", width=300, height=300)
with self._window.frame:
with omni.ui.VStack():
label = omni.ui.Label("")
def on_click():
self._count += 1
label.text = f"count: {self._count}"
def on_reset():
self._count = 0
label.text = "empty"
on_reset()
with omni.ui.HStack():
omni.ui.Button("Add", clicked_fn=on_click)
omni.ui.Button("Reset", clicked_fn=on_reset)
def on_shutdown(self):
print("[FLEXR_v0] shutdown")

View File

@@ -0,0 +1,35 @@
[package]
# Semantic Versioning is used: https://semver.org/
version = "0.1.0"
# Description
category = "isaaclab"
readme = "README.md"
title = "Extension Template"
author = "Isaac Lab Project Developers"
maintainer = "Isaac Lab Project Developers"
description="Extension Template for Isaac Lab"
repository = "https://github.com/isaac-sim/IsaacLab.git"
keywords = ["extension", "template", "isaaclab"]
[dependencies]
"isaaclab" = {}
"isaaclab_assets" = {}
"isaaclab_mimic" = {}
"isaaclab_rl" = {}
"isaaclab_tasks" = {}
# NOTE: Add additional dependencies here
[[python.module]]
name = "FLEXR_v0"
[isaaclab_settings]
# TODO: Uncomment and list any apt dependencies here.
# If none, leave it commented out.
# apt_deps = ["example_package"]
# TODO: Uncomment and provide path to a ros_ws
# with rosdeps to be installed. If none,
# leave it commented out.
# ros_ws = "path/from/extension_root/to/ros_ws"

View File

@@ -0,0 +1,10 @@
Changelog
---------
0.1.0 (2025-06-14)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Created an initial template for building an extension or project based on Isaac Lab

View File

@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools", "wheel", "toml"]
build-backend = "setuptools.build_meta"

44
source/FLEXR_v0/setup.py Normal file
View File

@@ -0,0 +1,44 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Installation script for the 'FLEXR_v0' python package."""
import os
import toml
from setuptools import setup
# Obtain the extension data from the extension.toml file
EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
# Read the extension.toml file
EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
# Minimum dependencies required prior to installation
INSTALL_REQUIRES = [
# NOTE: Add dependencies
"psutil",
]
# Installation operation
setup(
name="FLEXR_v0",
packages=["FLEXR_v0"],
author=EXTENSION_TOML_DATA["package"]["author"],
maintainer=EXTENSION_TOML_DATA["package"]["maintainer"],
url=EXTENSION_TOML_DATA["package"]["repository"],
version=EXTENSION_TOML_DATA["package"]["version"],
description=EXTENSION_TOML_DATA["package"]["description"],
keywords=EXTENSION_TOML_DATA["package"]["keywords"],
install_requires=INSTALL_REQUIRES,
license="MIT",
include_package_data=True,
python_requires=">=3.10",
classifiers=[
"Natural Language :: English",
"Programming Language :: Python :: 3.10",
"Isaac Sim :: 4.5.0",
],
zip_safe=False,
)