From f85f987e0b1bf614a4a0e3951bb6e20ad02ede24 Mon Sep 17 00:00:00 2001
From: CaoWangrenbo <cao.wangrenbo@yandex.com>
Date: Fri, 20 Jun 2025 01:09:43 +0800
Subject: [PATCH] =?UTF-8?q?update:=20=E5=A2=9E=E5=8A=A0=E5=85=B3=E8=8A=82?=
 =?UTF-8?q?=E8=83=BD=E9=87=8F=E8=B4=9F=E5=A5=96=E5=8A=B1=EF=BC=88=E7=BC=BA?=
 =?UTF-8?q?=E5=B0=91=E5=85=B3=E8=8A=82=E8=A7=92=E5=8A=A0=E9=80=9F=E5=BA=A6?=
 =?UTF-8?q?=E9=A1=B9=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

与此前改动相比变化不大
---
 .../tasks/direct/flexr_v0/flexr_v0_env.py         | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
index 81426b1..354a8ba 100644
--- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
+++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
@@ -595,7 +595,20 @@ def compute_rewards(
         # action_rate = torch.norm(actions - last_actions, dim=1, keepdim=True)  # shape: [num_envs, 1]
         # action_rate_reward = -1.0 * action_rate * rew_scale_action_rate
 
-        total_reward = tracking_reward + z_reward + omega_xy_reward
+        # 能量奖励
+        # # 关节扭矩 - 扭矩范数的平方
+        joint_torque_norm_squared = torch.sum(joint_torque.pow(2), dim=1, keepdim=True)  # shape: [num_envs, 1]
+        joint_torque_reward = -1.0 * joint_torque_norm_squared * rew_scale_joint_torque * 0.00002 * dt
+        # # 关节速度 - 角速度范数平方和角加速度范数平方（目前先使用角速度）
+        joint_vel_norm_squared = torch.sum(joint_vel.pow(2), dim=1, keepdim=True)  # shape: [num_envs, 1]
+        joint_vel_reward = -1.0 * joint_vel_norm_squared * rew_scale_joint_motion * 0.001 * dt
+
+
+        # # 关节运动
+        # joint_vel_norm = torch.norm(joint_vel, dim=1, keepdim=True)  # shape: [num_envs, 1]
+        # joint_vel_reward = -1.0 * joint_vel_norm * rew_scale_joint_motion
+
+        total_reward = tracking_reward + z_reward + omega_xy_reward + joint_torque_reward + joint_vel_reward
 
         # # 调试打印张量大小
         # print(f"tracking_reward: {tracking_reward.shape}, z_reward: {z_reward.shape}, omega_xy_reward: {omega_xy_reward.shape}, total_reward: {total_reward.shape}")