From ab8ce4d3330050d8f08fce1a720a608cf8ccf2fc Mon Sep 17 00:00:00 2001 From: CaoWangrenbo Date: Thu, 19 Jun 2025 00:29:38 +0800 Subject: [PATCH] =?UTF-8?q?update:=20=E6=A0=B9=E6=8D=AE=20legged=5Fgym=20?= =?UTF-8?q?=E8=AE=BA=E6=96=87=E6=80=9D=E8=B7=AF=E4=BF=AE=E6=94=B9=E8=A7=82?= =?UTF-8?q?=E6=B5=8B=E7=A9=BA=E9=97=B4=E5=92=8C=E5=A5=96=E5=8A=B1=E5=87=BD?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 目前可在50轮左右训练后(4096 envs)保持机器人稳定 --- .../flexr_v0/agents/rl_games_ppo_cfg.yaml | 2 +- .../direct/flexr_v0/agents/rsl_rl_ppo_cfg.py | 8 +- .../tasks/direct/flexr_v0/flexr_v0_env.py | 285 ++++++++++-------- .../tasks/direct/flexr_v0/flexr_v0_env_cfg.py | 15 +- 4 files changed, 179 insertions(+), 131 deletions(-) diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rl_games_ppo_cfg.yaml b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rl_games_ppo_cfg.yaml index c3844a1..453691c 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rl_games_ppo_cfg.yaml +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rl_games_ppo_cfg.yaml @@ -47,7 +47,7 @@ params: env_name: rlgpu device: 'cuda:0' device_name: 'cuda:0' - multi_gpu: True + multi_gpu: False ppo: True mixed_precision: False normalize_input: True diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rsl_rl_ppo_cfg.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rsl_rl_ppo_cfg.py index 133d552..480792d 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rsl_rl_ppo_cfg.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/agents/rsl_rl_ppo_cfg.py @@ -17,10 +17,10 @@ class PPORunnerCfg(RslRlOnPolicyRunnerCfg): empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, - # actor_hidden_dims=[256, 128, 64, 32], - # critic_hidden_dims=[256, 128, 64, 32], - actor_hidden_dims=[32, 32, 32], - critic_hidden_dims=[32, 32, 32], + actor_hidden_dims=[256, 128, 64, 32], + critic_hidden_dims=[256, 128, 64, 32], + # actor_hidden_dims=[32, 32, 32], #old config + # critic_hidden_dims=[32, 32, 32], activation="elu", ) algorithm = RslRlPpoAlgorithmCfg( diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py index f90c7e6..6f6296a 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py @@ -13,7 +13,7 @@ import isaaclab.sim as sim_utils from isaaclab.assets import Articulation from isaaclab.envs import DirectRLEnv from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane -from isaaclab.utils.math import sample_uniform, euler_xyz_from_quat +from isaaclab.utils.math import sample_uniform, euler_xyz_from_quat, quat_apply_inverse from isaaclab.sensors import RayCaster, RayCasterCfg, patterns from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR @@ -46,12 +46,12 @@ def define_markers() -> VisualizationMarkers: # TODO 注意传感器父框架的配置 def define_height_sensor() -> RayCaster: height_sensor_cfg = RayCasterCfg( - prim_path="/World/envs/env_.*/Robot/body", # 明确路径 - update_period=0.02, - offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 1.0)), # 更合理的初始高度 + prim_path="/World/envs/env_.*/Robot/body", + update_period=0.02, # 50Hz + offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 0.0)), attach_yaw_only=True, - pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=[1.0, 1.0]), # type: ignore - debug_vis=True, + pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=[1.0, 1.0]), # type: ignore # TODO 修改传感器尺寸 + debug_vis=False, mesh_prim_paths=["/World/ground"], # 确认的实际地面路径 ) return RayCaster(cfg=height_sensor_cfg) @@ -91,19 +91,34 @@ class FlexrV0Env(DirectRLEnv): self._actuator_joint_indices["wheel_BR"] ) - # 调试打印所有映射 - logging.debug("Actuator Group to Joint Indices:", self._actuator_joint_indices) - - # 初始化状态变量 - self.joint_pos = self.robot.data.joint_pos - self.joint_vel = self.robot.data.joint_vel + # # 调试打印所有映射 + # logging.debug("Actuator Group to Joint Indices:", self._actuator_joint_indices) # 中间变量 + self.dt = self.cfg.sim.dt + # 指令 - 线速度仅跟踪 x 和 y 分量;角速度仅跟踪 z 分量 + # FIXME 暂时初始化为0值 + self.cmd_ang_vel = torch.zeros_like(self.robot.data.root_ang_vel_w) + self.cmd_lin_vel = torch.zeros_like(self.robot.data.root_lin_vel_w) # 角速度 self._last_root_ang_vel = torch.zeros_like(self.robot.data.root_ang_vel_w) # 角度 self.orientations = torch.zeros_like(self.robot.data.root_quat_w) - + # 线速度 [num_envs, 3] + self.base_lin_vel = torch.zeros_like(self.robot.data.root_lin_vel_w) + # 角速度 [num_envs, 3] + self.base_ang_vel = torch.zeros_like(self.robot.data.root_ang_vel_w) + # 关节速度 [num_envs, n_joints] + self.joint_vel = torch.zeros_like(self.robot.data.joint_vel) + # 关节角度 [num_envs, n_joints] + self.joint_pos = torch.zeros_like(self.robot.data.joint_pos) + # 关节扭矩 [num_envs, n_joints] + self.joint_torque = torch.zeros_like(self.robot.data.applied_torque) + + # 初始化状态变量 + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + def _get_wheel_joint_indices(self, prefix: str, joint_names: list[str]) -> list[int]: """获取指定腿部的所有轮子关节索引(返回整数列表)""" indices = [] @@ -134,7 +149,6 @@ class FlexrV0Env(DirectRLEnv): def _pre_physics_step(self, actions: torch.Tensor) -> None: self.actions = actions.clone() - self.orientations = self.robot.data.root_quat_w def _apply_action(self) -> None: @@ -144,6 +158,8 @@ class FlexrV0Env(DirectRLEnv): if self.actions.dim() == 1: self.actions = self.actions.unsqueeze(0) + self.last_actions = self.actions.clone() + # 分离动作分量 arm_actions = self.actions[:, :4] # 前4个分量控制摆臂 (FL, FR, BL, BR) wheel_actions = self.actions[:, 4:8] # 后4个分量控制轮组 (FL, FR, BL, BR) @@ -174,91 +190,98 @@ class FlexrV0Env(DirectRLEnv): for i, group in enumerate(wheel_groups): joint_indices = self._actuator_joint_indices[group] # 全局索引 wheel_vel_target[:, joint_indices] = wheel_actions[:, i].unsqueeze(-1) - + + self.robot.set_joint_velocity_target( + wheel_vel_target[:, self._all_wheel_joint_indices], # 只选择轮组部分 + joint_ids=self._all_wheel_joint_indices + ) + # print(f"wheel_actions: {wheel_actions}") # print(f"actions: {self.actions}") # print(f"Arm Pos Target: {arm_pos_target}") # print(f"Wheel Vel Target: {wheel_vel_target}") - - # self.robot.set_joint_velocity_target( - # wheel_vel_target[:, self._all_wheel_joint_indices], # 只选择轮组部分 - # joint_ids=self._all_wheel_joint_indices - # ) - - + def _get_observations(self) -> dict: - # 获取摆臂关节速度 + # 计算自身坐标系重力的矢量值 + # 世界坐标系下的重力向量 (0, 0, -1) + gravity_world = torch.tensor([0.0, 0.0, -1.0], device=self.device).repeat(self.num_envs, 1) + # 转换到机体坐标系 + gravity_body = quat_apply_inverse(self.orientations, gravity_world) + + # arm 关节的位置和速度 + arm_pos = self.joint_pos[:, self._arm_joint_indices] # [num_envs, 4] arm_vel = self.joint_vel[:, self._arm_joint_indices] # [num_envs, 4] - # 计算每个轮组的平均速度 + # 轮组速度 wheel_FL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FL"]].mean(dim=1, keepdim=True) wheel_FR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FR"]].mean(dim=1, keepdim=True) wheel_BL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BL"]].mean(dim=1, keepdim=True) wheel_BR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BR"]].mean(dim=1, keepdim=True) wheel_vel = torch.cat([wheel_FL_vel, wheel_FR_vel, wheel_BL_vel, wheel_BR_vel], dim=1) # [num_envs, 4] - - # 获取车体姿态和运动信息 - base_quat = self.robot.data.root_quat_w # [num_envs, 4] (w,x,y,z) - base_ang_vel = self.robot.data.root_ang_vel_w # [num_envs, 3] (x,y,z) - # 将四元数转换为欧拉角(使用Isaac Lab的函数) - roll, pitch, _ = euler_xyz_from_quat(base_quat) # 返回弧度值 + # 先前动作 + last_actions = self.last_actions if hasattr(self, 'last_actions') else torch.zeros_like(self.actions) - # # 归一化角度到[-1,1]范围(假设±π/2是最大范围) - # norm_pitch = pitch / (math.pi/2) # 归一化到[-1,1] - # norm_roll = roll / (math.pi/2) # 归一化到[-1,1] - norm_pitch = pitch / (math.pi) # 归一化到[-1,1] - norm_roll = roll / (math.pi) # 归一化到[-1,1] + # 指令xy线速度和指令角速度 + cmd_lin_vel_xy = self.cmd_lin_vel[:, :2] # [num_envs, 2] + cmd_ang_vel_z = self.cmd_ang_vel[:, 2:3] # [num_envs, 1] - # 添加噪声(可选) - noise_std = 0.01 # 噪声标准差 - norm_pitch += torch.randn_like(norm_pitch) * noise_std - norm_roll += torch.randn_like(norm_roll) * noise_std + # # 高程图 (需要从height_sensor获取) + # # 确保高度传感器数据是最新的 + # self.height_sensor._update_outdated_buffers() + # # 获取高度数据并展平 (假设每个环境有121个高度点) + # height_scan = self.height_sensor.data.ray_hits_w[..., 2] # [num_envs, 121] + # # 归一化高度数据 + # height_scan = (height_scan - height_scan.mean(dim=1, keepdim=True)) / (height_scan.std(dim=1, keepdim=True) + 1e-6) # 组合所有观测 obs = torch.cat( ( + self.base_lin_vel, # 机体线速度 [num_envs, 3] + self.base_ang_vel, # 机体角速度 [num_envs, 3] + gravity_body, # 重力矢量 [num_envs, 3] + arm_pos, # 摆臂位置 [num_envs, 4] arm_vel, # 摆臂速度 [num_envs, 4] - wheel_vel, # 轮组平均速度 [num_envs, 4] - norm_pitch.unsqueeze(-1), # 归一化pitch [num_envs, 1] - norm_roll.unsqueeze(-1), # 归一化roll [num_envs, 1] - base_ang_vel # 三轴角速度 [num_envs, 3] + wheel_vel, # 轮组速度 [num_envs, 4] + last_actions, # 先前动作 [num_envs, 8] + cmd_lin_vel_xy, # 指令线速度xy [num_envs, 2] + cmd_ang_vel_z, # 指令角速度z [num_envs, 1] + # height_scan # 高程图 [num_envs, 121] ), dim=-1, ) observations = {"policy": obs} return observations - - def _get_rewards(self) -> torch.Tensor: - # 获取当前角速度 - current_ang_vel = self.robot.data.root_ang_vel_w - - # 计算角加速度 (当前角速度 - 上一帧角速度) / dt - root_ang_acc = (current_ang_vel - self._last_root_ang_vel) / self.step_dt - - # 更新上一帧角速度 - self._last_root_ang_vel[:] = current_ang_vel - - # 目标前进速度 (可以根据需要调整) - target_velocity = torch.ones(self.num_envs, 1, device=self.device) * self.cfg.target_velocity - # 计算奖励 + def _get_rewards(self) -> torch.Tensor: + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + self.joint_torque = self.robot.data.applied_torque + self.base_lin_vel = self.robot.data.root_lin_vel_w + self.base_ang_vel = self.robot.data.root_ang_vel_w + self.orientations = self.robot.data.root_quat_w + total_reward = compute_rewards( - root_ang_acc=root_ang_acc, - last_root_ang_vel=self._last_root_ang_vel, - dt=self.step_dt, - rew_scale_smoothness=self.cfg.rew_scale_smoothness, - rew_scale_alive=self.cfg.rew_scale_alive, - rew_scale_velocity=self.cfg.rew_scale_velocity, - base_lin_vel=self.robot.data.root_lin_vel_w, - target_velocity=target_velocity, - terminated=self.reset_terminated + self.dt, + self.cmd_lin_vel, + self.cmd_ang_vel, + self.base_lin_vel, + self.base_ang_vel, + self.joint_vel, + self.joint_torque, + self.actions, + self.last_actions, + self.cfg.rew_scale_lin_vel, + self.cfg.rew_scale_ang_vel, + self.cfg.rew_scale_z, + self.cfg.rew_scale_orientation, + self.cfg.rew_scale_joint_motion, + self.cfg.rew_scale_joint_torque, + self.cfg.rew_scale_action_rate, ) - # logging.info(f"Computed rewards mean: {total_reward.mean().item()}") - - return total_reward + return total_reward.reshape(-1) # def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]: # self.joint_pos = self.robot.data.joint_pos @@ -319,7 +342,7 @@ class FlexrV0Env(DirectRLEnv): def _check_robot_flipped(self) -> torch.Tensor: - """改进的倾覆检测方法,正确处理0/360度问题""" + """倾覆检测""" # 获取欧拉角 (弧度) roll, pitch, _ = euler_xyz_from_quat(self.orientations) @@ -343,15 +366,6 @@ class FlexrV0Env(DirectRLEnv): print(f"Robot flipped at idx: {torch.nonzero(flipped)} [x: {roll_deg[flipped]}, y: {pitch_deg[flipped]}]") return flipped - - # def _get_velocities(self, env_ids = 0) -> tuple[torch.Tensor, torch.Tensor]: - # return self.joint_vel[env_ids, :3], self.joint_vel[env_ids, 3:] - - # def _get_positions(self, env_ids = 0) -> tuple[torch.Tensor, torch.Tensor]: - # return self.joint_pos[env_ids, :3], self.joint_pos[env_ids, 3:] - - # def _get_orientations(self, env_ids = 0) -> tuple[torch.Tensor, torch.Tensor]: - # return self.orientations[env_ids, :3], self.orientations[env_ids, 3:] def _debug_print_idx(self, env_ids : Sequence[int]): logging.debug(f"env_ids: {env_ids}") @@ -360,50 +374,75 @@ class FlexrV0Env(DirectRLEnv): # logging.debug(f"orientations: {self.orientations[env_ids]}") logging.debug(f"euler_angles: {euler_xyz_from_quat(self.orientations[env_ids])}") - @torch.jit.script def compute_rewards( - root_ang_acc: torch.Tensor, # 根部角加速度 [num_envs, 3] (roll, pitch, yaw) - last_root_ang_vel: torch.Tensor, # 上一帧的根部角速度 [num_envs, 3] - dt: float, # 时间步长 - rew_scale_smoothness: float, # 平滑性奖励系数 - rew_scale_alive: float, # 存活奖励系数 - rew_scale_velocity: float, # 速度奖励系数 - base_lin_vel: torch.Tensor, # 基础线速度 [num_envs, 3] - target_velocity: torch.Tensor, # 目标速度 [num_envs, 1] - terminated: torch.Tensor # 终止标志 [num_envs] -) -> torch.Tensor: - """ - 计算包含平顺性奖励的总奖励 - - 参数: - root_ang_acc: 当前角加速度 (通过当前角速度与上一帧角速度计算得到) - last_root_ang_vel: 上一帧的角速度 - dt: 时间步长 - rew_scale_smoothness: 平滑性奖励权重 - rew_scale_alive: 存活奖励权重 - rew_scale_velocity: 速度跟踪奖励权重 - base_lin_vel: 当前线速度 - target_velocity: 目标前进速度 - terminated: 是否终止的标志 - - 返回: - 总奖励值 - """ - # 计算平顺性奖励 - 惩罚pitch和roll的剧烈变化 - # 只取pitch和roll的加速度 (忽略yaw) - pitch_roll_acc = root_ang_acc[:, :2] # [num_envs, 2] - smoothness_penalty = torch.sum(torch.square(pitch_roll_acc), dim=1) # [num_envs] - smoothness_reward = -rew_scale_smoothness * smoothness_penalty * dt - # 速度跟踪奖励 (前进方向x轴) - velocity_error = torch.abs(base_lin_vel[:, 0] - target_velocity[:, 0]) # [num_envs] - velocity_reward = -rew_scale_velocity * velocity_error - # 存活奖励 - alive_reward = rew_scale_alive * (~terminated).float() - # 总奖励 - # total_reward = alive_reward + velocity_reward + smoothness_reward - # total_reward = alive_reward + smoothness_reward - # total_reward = velocity_reward - total_reward = smoothness_reward + # 输入参数 + dt:float, + cmd_lin_vel: torch.Tensor, + cmd_ang_vel: torch.Tensor, + base_lin_vel: torch.Tensor, + base_ang_vel: torch.Tensor, + joint_vel: torch.Tensor, + joint_torque: torch.Tensor, + actions: torch.Tensor, + last_actions: torch.Tensor, + # 奖励参数 + rew_scale_lin_vel: float, + rew_scale_ang_vel: float, + rew_scale_z: float, + rew_scale_orientation: float, + rew_scale_joint_motion: float, + rew_scale_joint_torque: float, + rew_scale_action_rate: float, + ) -> torch.Tensor: + # 线速度/角速度跟踪(计算两个向量之间的欧几里得距离) + sigma_squared = 0.25 + # 线速度部分 + # 提取 xy 方向的速度 + v_target_xy = cmd_lin_vel[:, :2] # [num_envs, 2] + v_actual_xy = base_lin_vel[:, :2] # [num_envs, 2] + # 计算偏差的范数平方 + v_diff_xy = v_target_xy - v_actual_xy + v_diff_norm_squared = torch.sum(v_diff_xy ** 2, dim=1, keepdim=True) + # 计算线速度跟踪的奖励 + linear_error = torch.exp(-v_diff_norm_squared / sigma_squared) # [num_envs, 1] + # 角速度部分 + omega_target_z = cmd_ang_vel[:, 2].unsqueeze(1) # [num_envs, 1] + omega_actual_z = base_ang_vel[:, 2].unsqueeze(1) # [num_envs, 1] + # 计算偏差 + omega_diff_z = omega_target_z - omega_actual_z # [num_envs, 1] + omega_diff_squared = omega_diff_z.pow(2) # [num_envs, 1] + # 计算角速度跟踪的奖励 + angular_error = torch.exp(-omega_diff_squared / sigma_squared) # [num_envs, 1] - return total_reward \ No newline at end of file + tracking_reward = linear_error * rew_scale_lin_vel * 1.0 * dt + angular_error * rew_scale_ang_vel * 0.5 * dt + + # # 调试打印张量大小 + # print(f"linear_error: {linear_error.shape}, angular_error: {angular_error.shape}, tracking_reward: {tracking_reward.shape}") + + # z 轴平稳性 抑制z轴跳动 + v_z = base_ang_vel[:, 2:3] # 提取 z 轴分量,保持维度为 [num_envs, 1] + v_z_squared = v_z.pow(2) # 计算平方 + z_reward = -1.0 * v_z_squared * rew_scale_z * 4.0 * dt + + # # 调试打印张量大小 + # print(f"v_z: {v_z.shape}, v_z_squared: {v_z_squared.shape}, z_reward: {z_reward.shape}") + + # 角速度平稳性 抑制xy + omega_xy = base_ang_vel[:, :2] # shape: [num_envs, 2] + omega_xy_norm_squared = torch.sum(omega_xy.pow(2), dim=1, keepdim=True) # shape: [num_envs, 1] + omega_xy_reward = -1.0 * omega_xy_norm_squared * rew_scale_orientation * 0.05 * dt + + # # 调试打印张量大小 + # print(f"omega_xy: {omega_xy.shape}, omega_xy_norm_squared: {omega_xy_norm_squared.shape}, omega_xy_reward: {omega_xy_reward.shape}") + + # # 行动率奖励 + # action_rate = torch.norm(actions - last_actions, dim=1, keepdim=True) # shape: [num_envs, 1] + # action_rate_reward = -1.0 * action_rate * rew_scale_action_rate + + total_reward = tracking_reward + z_reward + omega_xy_reward + + # # 调试打印张量大小 + # print(f"tracking_reward: {tracking_reward.shape}, z_reward: {z_reward.shape}, omega_xy_reward: {omega_xy_reward.shape}, total_reward: {total_reward.shape}") + + return total_reward \ No newline at end of file diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py index ba0b335..09658e0 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py @@ -19,10 +19,10 @@ import math class FlexrV0EnvCfg(DirectRLEnvCfg): # env decimation = 2 - episode_length_s = 5.0 + episode_length_s = 10.0 # - spaces definition action_space = 8 - observation_space = 13 + observation_space = 32 state_space = 0 # simulation @@ -59,4 +59,13 @@ class FlexrV0EnvCfg(DirectRLEnvCfg): # rew_scale_pole_vel = -0.005 # # - reset states/conditions # initial_pole_angle_range = [-0.25, 0.25] # pole angle sample range on reset [rad] - # max_cart_pos = 3.0 # reset if cart exceeds this position [m] \ No newline at end of file + # max_cart_pos = 3.0 # reset if cart exceeds this position [m] + + # 奖励权重参数 # TODO 写入外部配置 + rew_scale_lin_vel = 1.0 # 线速度跟踪 + rew_scale_ang_vel = 0.5 # 角速度跟踪 + rew_scale_z = 0.1 # z 轴稳定性 + rew_scale_orientation = 0.2 # 姿态稳定性 + rew_scale_joint_motion = 0.001 # 关节运动 + rew_scale_joint_torque = 0.00002 # 关节扭矩 + rew_scale_action_rate = 0.25 # 动作平滑性 \ No newline at end of file