Skip to content

Commit

Permalink
gpr - smaller efforts, updates to export (#126)
Browse files Browse the repository at this point in the history
update efforts and export
  • Loading branch information
budzianowski authored Dec 10, 2024
1 parent 092511d commit 2059385
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 716 deletions.
Binary file modified examples/gpr_walking.kinfer
Binary file not shown.
Binary file modified examples/walking_policy.pt
Binary file not shown.
2 changes: 1 addition & 1 deletion sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from sim.envs.humanoids.dora_env import DoraFreeEnv
from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
from sim.envs.humanoids.g1_env import G1FreeEnv
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
Expand Down
12 changes: 6 additions & 6 deletions sim/envs/humanoids/gpr_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class safety(LeggedRobotCfg.safety):
# safety factors
pos_limit = 1.0
vel_limit = 1.0
torque_limit = 0.85
torque_limit = 1.0

class asset(LeggedRobotCfg.asset):
name = "gpr"
Expand Down Expand Up @@ -123,7 +123,7 @@ class domain_rand(LeggedRobotCfg.domain_rand):

randomize_base_mass = True
added_mass_range = [-3.0, 3.0]
push_robots = False
push_robots = True
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
Expand All @@ -150,14 +150,14 @@ class rewards:
max_dist = 0.5
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.06 # m
target_feet_height = 0.05 # m

cycle_time = 0.64 # sec
cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 700 # forces above this value are penalized
max_contact_force = 400 # forces above this value are penalized

class scales:
# reference motion tracking
Expand Down Expand Up @@ -213,7 +213,7 @@ class GprStandingCfg(GprCfg):

class rewards:
# quite important to keep it right
base_height_target = 1.06
base_height_target = Robot.height
min_dist = 0.2
max_dist = 0.5
# put some settings here for LLM parameter tuning
Expand Down
32 changes: 17 additions & 15 deletions sim/model_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@

@dataclass
class ActorCfg:
embodiment: str = "gpr"
cycle_time: float = 0.4 # Cycle time for sinusoidal command input
action_scale: float = 0.25 # Scale for actions
lin_vel_scale: float = 2.0 # Scale for linear velocity
ang_vel_scale: float = 1.0 # Scale for angular velocity
quat_scale: float = 1.0 # Scale for quaternion
dof_pos_scale: float = 1.0 # Scale for joint positions
dof_vel_scale: float = 0.05 # Scale for joint velocities
frame_stack: int = 15 # Number of frames to stack for the policy input
clip_observations: float = 18.0 # Clip observations to this value
clip_actions: float = 18.0 # Clip actions to this value
embodiment: str
cycle_time: float # Cycle time for sinusoidal command input
action_scale: float # Scale for actions
lin_vel_scale: float # Scale for linear velocity
ang_vel_scale: float # Scale for angular velocity
quat_scale: float # Scale for quaternion
dof_pos_scale: float # Scale for joint positions
dof_vel_scale: float # Scale for joint velocities
frame_stack: int # Number of frames to stack for the policy input
clip_observations: float # Clip observations to this value
clip_actions: float # Clip actions to this value
sim_dt: float # Simulation time step
sim_decimation: int # Simulation decimation
tau_factor: float # Torque limit factor


class ActorCritic(nn.Module):
Expand Down Expand Up @@ -242,6 +245,8 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
num_actions = a_model.num_actions
num_observations = a_model.num_observations

default_standing = list(a_model.robot.default_standing().values())

return (
a_model,
{
Expand All @@ -250,6 +255,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
"robot_damping": robot_damping,
"num_actions": num_actions,
"num_observations": num_observations,
"default_standing": default_standing,
},
input_tensors,
)
Expand Down Expand Up @@ -338,7 +344,3 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st
buffer2.seek(0)

return ort.InferenceSession(buffer2.read())


if __name__ == "__main__":
convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx")
37 changes: 21 additions & 16 deletions sim/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,11 @@
from kinfer.export.pytorch import export_to_onnx
from tqdm import tqdm

# Local imports third
from sim.env import run_dir # noqa: E402
from sim.envs import task_registry # noqa: E402
from sim.h5_logger import HDF5Logger
from sim.model_export import ( # noqa: E402
ActorCfg,
convert_model_to_onnx,
get_actor_policy,
)

# Local imports third
from sim.model_export import ActorCfg, get_actor_policy
from sim.utils.helpers import get_args # noqa: E402
from sim.utils.logger import Logger # noqa: E402

Expand Down Expand Up @@ -92,15 +88,22 @@ def play(args: argparse.Namespace) -> None:
if args.export_onnx:
path = ppo_runner.load_path
embodiment = ppo_runner.cfg["experiment_name"].lower()
policy_cfg = ActorCfg(embodiment=embodiment)

if embodiment == "stompypro":
policy_cfg.cycle_time = 0.4
elif embodiment == "stompymicro":
policy_cfg.cycle_time = 0.2
else:
print(f"Specific policy cfg for {embodiment} not implemented")

policy_cfg = ActorCfg(
embodiment=embodiment,
cycle_time=env_cfg.rewards.cycle_time,
sim_dt=env_cfg.sim.dt,
sim_decimation=env_cfg.control.decimation,
tau_factor=env_cfg.safety.torque_limit,
action_scale=env_cfg.control.action_scale,
lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
quat_scale=env_cfg.normalization.obs_scales.quat,
dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
frame_stack=env_cfg.env.frame_stack,
clip_observations=env_cfg.normalization.clip_observations,
clip_actions=env_cfg.normalization.clip_actions,
)
actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)

# Merge policy_cfg and sim2sim_info into a single config object
Expand All @@ -117,6 +120,8 @@ def play(args: argparse.Namespace) -> None:

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if args.log_h5:
from sim.h5_logger import HDF5Logger

# Create directory for HDF5 files
h5_dir = run_dir() / "h5_out" / args.task / now
h5_dir.mkdir(parents=True, exist_ok=True)
Expand Down
Loading

0 comments on commit 2059385

Please sign in to comment.