diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py index 81426b1..354a8ba 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py @@ -595,7 +595,20 @@ def compute_rewards( # action_rate = torch.norm(actions - last_actions, dim=1, keepdim=True) # shape: [num_envs, 1] # action_rate_reward = -1.0 * action_rate * rew_scale_action_rate - total_reward = tracking_reward + z_reward + omega_xy_reward + # 能量奖励 + # # 关节扭矩 - 扭矩范数的平方 + joint_torque_norm_squared = torch.sum(joint_torque.pow(2), dim=1, keepdim=True) # shape: [num_envs, 1] + joint_torque_reward = -1.0 * joint_torque_norm_squared * rew_scale_joint_torque * 0.00002 * dt + # # 关节速度 - 角速度范数平方和角加速度范数平方(目前先使用角速度) + joint_vel_norm_squared = torch.sum(joint_vel.pow(2), dim=1, keepdim=True) # shape: [num_envs, 1] + joint_vel_reward = -1.0 * joint_vel_norm_squared * rew_scale_joint_motion * 0.001 * dt + + + # # 关节运动 + # joint_vel_norm = torch.norm(joint_vel, dim=1, keepdim=True) # shape: [num_envs, 1] + # joint_vel_reward = -1.0 * joint_vel_norm * rew_scale_joint_motion + + total_reward = tracking_reward + z_reward + omega_xy_reward + joint_torque_reward + joint_vel_reward # # 调试打印张量大小 # print(f"tracking_reward: {tracking_reward.shape}, z_reward: {z_reward.shape}, omega_xy_reward: {omega_xy_reward.shape}, total_reward: {total_reward.shape}")