diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index 1d4b38b..6a94f33 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -8,13 +8,14 @@ # * 3. add the runner confg name and location to the runner config dict # * 3. register the task experiment name to the env/config/ppo classes -# * from y import x where {y:x} class_dict = { "LeggedRobot": ".base.legged_robot", "FixedRobot": ".base.fixed_robot", "Cartpole": ".cartpole.cartpole", "MiniCheetah": ".mini_cheetah.mini_cheetah", "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref", + "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc", + "MiniCheetahOscIK": ".mini_cheetah.mini_cheetah_osc_IK", "MIT_Humanoid": ".mit_humanoid.mit_humanoid", "Anymal": ".anymal_c.anymal", "A1": ".a1.a1", @@ -25,6 +26,8 @@ "CartpoleCfg": ".cartpole.cartpole_config", "MiniCheetahCfg": ".mini_cheetah.mini_cheetah_config", "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config", + "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config", + "MiniCheetahOscIKCfg": ".mini_cheetah.mini_cheetah_osc_IK_config", "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config", "A1Cfg": ".a1.a1_config", "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -35,6 +38,8 @@ "CartpoleRunnerCfg": ".cartpole.cartpole_config", "MiniCheetahRunnerCfg": ".mini_cheetah.mini_cheetah_config", "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config", + "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config", + "MiniCheetahOscIKRunnerCfg": ".mini_cheetah.mini_cheetah_osc_IK_config", "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config", "A1RunnerCfg": ".a1.a1_config", "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -49,6 +54,16 @@ "MiniCheetahRefCfg", "MiniCheetahRefRunnerCfg", ], + "mini_cheetah_osc": [ + "MiniCheetahOsc", + "MiniCheetahOscCfg", + "MiniCheetahOscRunnerCfg", + ], + "mini_cheetah_osc_IK": [ + "MiniCheetahOscIK", + "MiniCheetahOscIKCfg", + "MiniCheetahOscIKRunnerCfg", + ], "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"], "humanoid_running": [ "HumanoidRunning", diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index a538067..de015b2 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -3,14 +3,14 @@ LeggedRobotRunnerCfg, ) -BASE_HEIGHT_REF = 0.33 +BASE_HEIGHT_REF = 0.3 class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 num_actuators = 12 - episode_length_s = 10 + episode_length_s = 4 class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" @@ -25,67 +25,63 @@ class init_state(LeggedRobotCfg.init_state): # * reset setup chooses how the initial conditions are chosen. # * "reset_to_basic" = a single position # * "reset_to_range" = uniformly random from a range defined below - reset_mode = "reset_to_basic" + reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 0.33] # x,y,z [m] + pos = [0.0, 0.0, 0.35] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] # * initialization for random range setup dof_pos_range = { - "haa": [-0.05, 0.05], - "hfe": [-0.85, -0.6], - "kfe": [-1.45, 1.72], - } - dof_vel_range = { - "haa": [0.0, 0.0], - "hfe": [0.0, 0.0], - "kfe": [0.0, 0.0], + "haa": [-0.01, 0.01], + "hfe": [-0.785398, -0.785398], + "kfe": [1.596976, 1.596976], } + dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]} root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [0.37, 0.4], # z + [0.35, 0.35], # z [0.0, 0.0], # roll [0.0, 0.0], # pitch - [0.0, 0.0], - ] # yaw + [0.0, 0.0], # yaw + ] root_vel_range = [ - [-0.05, 0.05], # x + [-0.5, 2.0], # x [0.0, 0.0], # y [-0.05, 0.05], # z [0.0, 0.0], # roll [0.0, 0.0], # pitch - [0.0, 0.0], - ] # yaw + [0.0, 0.0], # yaw + ] class control(LeggedRobotCfg.control): # * PD Drive parameters: stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0} damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5} ctrl_frequency = 100 - desired_sim_frequency = 1000 + desired_sim_frequency = 500 class commands: # * time before command are changed[s] - resampling_time = 10.0 + resampling_time = 3.0 class ranges: - lin_vel_x = [-1.0, 1.0] # min max [m/s] + lin_vel_x = [-2.0, 3.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] - yaw_vel = 1 # max [rad/s] + yaw_vel = 3 # max [rad/s] class push_robots: - toggle = False - interval_s = 1 - max_push_vel_xy = 0.5 + toggle = True + interval_s = 15 + max_push_vel_xy = 0.05 push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] class domain_rand: - randomize_friction = False - friction_range = [0.5, 1.25] + randomize_friction = True + friction_range = [0.5, 1.0] randomize_base_mass = False added_mass_range = [-1.0, 1.0] @@ -95,16 +91,13 @@ class asset(LeggedRobotCfg.asset): + "mini_cheetah/urdf/mini_cheetah_simple.urdf" ) foot_name = "foot" - penalize_contacts_on = [] - terminate_after_contacts_on = ["base", "thigh"] + penalize_contacts_on = ["shank"] + terminate_after_contacts_on = ["base"] end_effector_names = ["foot"] - # * merge bodies connected by fixed joints. collapse_fixed_joints = False - # * 1 to disable, 0 to enable...bitwise filter self_collisions = 1 flip_visual_attachments = False disable_gravity = False - # * set all torques set to 0 disable_motors = False joint_damping = 0.1 rotor_inertia = [0.002268, 0.002268, 0.005484] * 4 @@ -118,34 +111,34 @@ class reward_settings(LeggedRobotCfg.reward_settings): tracking_sigma = 0.25 class scaling(LeggedRobotCfg.scaling): - base_ang_vel = 1.0 - base_lin_vel = 1.0 - commands = 1 - dof_vel = 100.0 # ought to be roughly max expected speed. - base_height = 1 - dof_pos = 1 + base_ang_vel = 0.3 + base_lin_vel = BASE_HEIGHT_REF + dof_vel = 4 * [2.0, 2.0, 4.0] + base_height = 0.3 + dof_pos = 4 * [0.2, 0.3, 0.3] dof_pos_obs = dof_pos - dof_pos_target = dof_pos - # tau_ff = 4*[18, 18, 28] # hip-abad, hip-pitch, knee + dof_pos_target = 4 * [0.2, 0.3, 0.3] + tau_ff = 4 * [18, 18, 28] + commands = [3, 1, 3] class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg): seed = -1 class policy(LeggedRobotRunnerCfg.policy): - actor_hidden_dims = [256, 256, 256] - critic_hidden_dims = [256, 256, 256] + actor_hidden_dims = [256, 256, 128] + critic_hidden_dims = [128, 64] # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid activation = "elu" actor_obs = [ - "base_height", "base_lin_vel", "base_ang_vel", "projected_gravity", "commands", "dof_pos_obs", "dof_vel", + "dof_pos_target", ] critic_obs = [ "base_height", @@ -155,33 +148,38 @@ class policy(LeggedRobotRunnerCfg.policy): "commands", "dof_pos_obs", "dof_vel", + "dof_pos_target", ] actions = ["dof_pos_target"] - add_noise = False + add_noise = True class noise: - dof_pos_obs = 0.005 # can be made very low + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 dof_vel = 0.005 - base_ang_vel = 0.05 - projected_gravity = 0.02 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 class reward(LeggedRobotRunnerCfg.policy.reward): class weights(LeggedRobotRunnerCfg.policy.reward.weights): - tracking_lin_vel = 5.0 - tracking_ang_vel = 5.0 + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 lin_vel_z = 0.0 - ang_vel_xy = 0.0 + ang_vel_xy = 0.01 orientation = 1.0 torques = 5.0e-7 - dof_vel = 1.0 - base_height = 1.0 - action_rate = 0.001 - action_rate2 = 0.0001 + dof_vel = 0.0 + min_base_height = 1.5 + action_rate = 0.01 + action_rate2 = 0.001 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 - dof_near_home = 1.0 + dof_near_home = 0.0 class termination_weight: termination = 0.01 @@ -191,23 +189,20 @@ class algorithm(LeggedRobotRunnerCfg.algorithm): value_loss_coef = 1.0 use_clipped_value_loss = True clip_param = 0.2 - entropy_coef = 0.01 - num_learning_epochs = 6 + entropy_coef = 0.02 + num_learning_epochs = 4 # * mini batch size = num_envs*nsteps / nminibatches - num_mini_batches = 6 - learning_rate = 1.0e-4 + num_mini_batches = 8 + learning_rate = 1.0e-5 schedule = "adaptive" # can be adaptive or fixed discount_horizon = 1.0 # [s] - GAE_bootstrap_horizon = 1.0 # [s] + GAE_bootstrap_horizon = 2.0 # [s] desired_kl = 0.01 max_grad_norm = 1.0 class runner(LeggedRobotRunnerCfg.runner): run_name = "" experiment_name = "mini_cheetah" - # * number of policy updates - max_iterations = 1000 + max_iterations = 500 algorithm_class_name = "PPO" - # * per iteration - # * (n_steps in Rudin 2021 paper - batch_size = n_steps * n_robots) - num_steps_per_env = 24 + num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py new file mode 100644 index 0000000..2f8880c --- /dev/null +++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py @@ -0,0 +1,480 @@ +import torch +import numpy as np +from isaacgym.torch_utils import torch_rand_float + +from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah + +from isaacgym import gymtorch + +MINI_CHEETAH_MASS = 8.292 * 9.81 # Weight of mini cheetah in Newtons + + +class MiniCheetahOsc(MiniCheetah): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + self.process_noise_std = self.cfg.osc.process_noise_std + + def _init_buffers(self): + super()._init_buffers() + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) + self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) + + self.oscillators_vel = torch.zeros_like(self.oscillators) + self.grf = torch.zeros(self.num_envs, 4, device=self.device) + self.osc_omega = self.cfg.osc.omega * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_coupling = self.cfg.osc.coupling * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_offset = self.cfg.osc.offset * torch.ones( + self.num_envs, 1, device=self.device + ) + + def _reset_oscillators(self, env_ids): + if len(env_ids) == 0: + return + # * random + if self.cfg.osc.init_to == "random": + self.oscillators[env_ids] = torch_rand_float( + 0, + 2 * torch.pi, + shape=self.oscillators[env_ids].shape, + device=self.device, + ) + elif self.cfg.osc.init_to == "standing": + self.oscillators[env_ids] = 3 * torch.pi / 2 + elif self.cfg.osc.init_to == "trot": + self.oscillators[env_ids] = torch.tensor( + [0.0, torch.pi, torch.pi, 0.0], device=self.device + ) + elif self.cfg.osc.init_to == "pace": + self.oscillators[env_ids] = torch.tensor( + [0.0, torch.pi, 0.0, torch.pi], device=self.device + ) + if self.cfg.osc.init_w_offset: + self.oscillators[env_ids, :] += ( + torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) + * 2 + * torch.pi + ) + elif self.cfg.osc.init_to == "pronk": + self.oscillators[env_ids, :] *= 0.0 + elif self.cfg.osc.init_to == "bound": + self.oscillators[env_ids, :] = torch.tensor( + [torch.pi, torch.pi, 0.0, 0.0], device=self.device + ) + else: + raise NotImplementedError + + if self.cfg.osc.init_w_offset: + self.oscillators[env_ids, :] += ( + torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) + * 2 + * torch.pi + ) + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + + def _reset_system(self, env_ids): + if len(env_ids) == 0: + return + self._reset_oscillators(env_ids) + + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + # * keep some robots in the same starting state as they ended + timed_out_subset = (self.timed_out & ~self.terminated) * ( + torch.rand(self.num_envs, device=self.device) + < self.cfg.init_state.timeout_reset_ratio + ) + + env_ids = (self.terminated | timed_out_subset).nonzero().flatten() + if len(env_ids) == 0: + return + super()._reset_system(env_ids) + + def _pre_decimation_step(self): + super()._pre_decimation_step() + # self.grf = self._compute_grf() + if not self.cfg.osc.randomize_osc_params: + self.compute_osc_slope() + + def compute_osc_slope(self): + cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold + stop = cmd_x < 0 + + self.osc_offset = stop * self.cfg.osc.offset + self.osc_omega = ( + stop * self.cfg.osc.omega_stop + + torch.randn_like(self.osc_omega) * self.cfg.osc.omega_var + ) + self.osc_coupling = ( + stop * self.cfg.osc.coupling_stop + + torch.randn_like(self.osc_coupling) * self.cfg.osc.coupling_var + ) + + self.osc_omega += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step, + min=0.0, + max=self.cfg.osc.omega_max, + ) + self.osc_coupling += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step, + min=0.0, + max=self.cfg.osc.coupling_max, + ) + + self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) + self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) + + def _process_rigid_body_props(self, props, env_id): + if env_id == 0: + # * init buffers for the domain rand changes + self.mass = torch.zeros(self.num_envs, 1, device=self.device) + self.com = torch.zeros(self.num_envs, 3, device=self.device) + + # * randomize mass + if self.cfg.domain_rand.randomize_base_mass: + lower = self.cfg.domain_rand.lower_mass_offset + upper = self.cfg.domain_rand.upper_mass_offset + # self.mass_ + props[0].mass += np.random.uniform(lower, upper) + self.mass[env_id] = props[0].mass + # * randomize com position + lower = self.cfg.domain_rand.lower_z_offset + upper = self.cfg.domain_rand.upper_z_offset + props[0].com.z += np.random.uniform(lower, upper) + self.com[env_id, 2] = props[0].com.z + + lower = self.cfg.domain_rand.lower_x_offset + upper = self.cfg.domain_rand.upper_x_offset + props[0].com.x += np.random.uniform(lower, upper) + self.com[env_id, 0] = props[0].com.x + return props + + def _post_decimation_step(self): + """Update all states that are not handled in PhysX""" + super()._post_decimation_step() + self.grf = self._compute_grf() + # self._step_oscillators() + + def _post_physx_step(self): + super()._post_physx_step() + self._step_oscillators(self.dt / self.cfg.control.decimation) + return None + + def _step_oscillators(self, dt=None): + if dt is None: + dt = self.dt + + local_feedback = self.osc_coupling * ( + torch.cos(self.oscillators) + self.osc_offset + ) + grf = self._compute_grf() + self.oscillators_vel = self.osc_omega - grf * local_feedback + # self.oscillators_vel *= torch_rand_float(0.9, + # 1.1, + # self.oscillators_vel.shape, + # self.device) + self.oscillators_vel += ( + torch.randn(self.oscillators_vel.shape, device=self.device) + * self.cfg.osc.process_noise_std + ) + + self.oscillators_vel *= 2 * torch.pi + self.oscillators += ( + self.oscillators_vel * dt + ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + def _resample_commands(self, env_ids): + """Randommly select commands of some environments + + Args: + env_ids (List[int]): Environments ids for which new commands are needed + """ + if len(env_ids) == 0: + return + super()._resample_commands(env_ids) + possible_commands = torch.tensor( + self.command_ranges["lin_vel_x"], device=self.device + ) + self.commands[env_ids, 0:1] = possible_commands[ + torch.randint( + 0, len(possible_commands), (len(env_ids), 1), device=self.device + ) + ] + + if 0 in self.cfg.commands.ranges.lin_vel_x: + # * with 20% chance, reset to 0 commands except for forward + self.commands[env_ids, 1:] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 20% chance, reset to 0 commands except for rotation + self.commands[env_ids, :2] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 10% chance, reset to 0 + self.commands[env_ids, :] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.9 + ).unsqueeze(1) + + if self.cfg.osc.randomize_osc_params: + self._resample_osc_params(env_ids) + + def _resample_osc_params(self, env_ids): + if len(env_ids) > 0: + self.osc_omega[env_ids, 0] = torch_rand_float( + self.cfg.osc.omega_range[0], + self.cfg.osc.omega_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_coupling[env_ids, 0] = torch_rand_float( + self.cfg.osc.coupling_range[0], + self.cfg.osc.coupling_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_offset[env_ids, 0] = torch_rand_float( + self.cfg.osc.offset_range[0], + self.cfg.osc.offset_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + + def perturb_base_velocity(self, velocity_delta, env_ids=None): + if env_ids is None: + env_ids = [range(self.num_envs)] + self.root_states[env_ids, 7:10] += velocity_delta + self.gym.set_actor_root_state_tensor( + self.sim, gymtorch.unwrap_tensor(self.root_states) + ) + + def _compute_grf(self, grf_norm=True): + grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) + if grf_norm: + return torch.clamp_max(grf / MINI_CHEETAH_MASS, 1.0) + else: + return grf + + def _switch(self): + c_vel = torch.linalg.norm(self.commands, dim=1) + return torch.exp( + -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) + / self.cfg.reward_settings.switch_scale + ) + + def _reward_cursorial(self): + # penalize the abad joints being away from 0 + return -torch.mean( + torch.square(self.dof_pos[:, 0:12:3] / self.scales["dof_pos"][0]), dim=1 + ) + + def _reward_swing_grf(self): + # Reward non-zero grf during swing (0 to pi) + rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return -torch.sum(rew, dim=1) + + def _reward_stance_grf(self): + # Reward non-zero grf during stance (pi to 2pi) + rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return torch.sum(rew, dim=1) + + def get_swing_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.lt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def get_stance_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.gt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), -torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def _reward_coupled_grf(self): + """ + Multiply rewards for stance/swing grf, discount when undesirable + behavior (grf during swing, no grf during stance) + """ + swing_rew = self.get_swing_grf() + stance_rew = self.get_stance_grf() + combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew + prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1) + return prod - torch.ones_like(prod) + + def _reward_swing_velocity(self): + """Reward non-zero end effector velocity during swing (0 to pi)""" + # velocity = torch.tanh(torch.norm(self.end_effector_lin_vel, dim=-1)) + velocity = torch.zeros_like( + self.oscillators + ) # TODO: Grab velocity from AJ V2 code + phase_bool = torch.lt(self.oscillators, torch.pi).int() + phase_sin = torch.maximum( + torch.zeros_like(self.oscillators), torch.sin(self.oscillators) + ) + if self.cfg.osc.osc_bool: + rew = phase_bool * velocity + else: + rew = phase_sin * velocity + return torch.mean(rew, dim=1) + + def _reward_stance_velocity(self): + """Reward zero end effector velocity during swing (pi to 2pi)""" + # velocity = torch.tanh(torch.norm(self.end_effector_lin_vel, dim=-1)) + velocity = torch.zeros_like( + self.oscillators + ) # TODO: Grab velocity from AJ V2 code + ph_bool = torch.gt(self.oscillators, torch.pi).int() + ph_sin = torch.maximum( + torch.zeros_like(self.oscillators), -torch.sin(self.oscillators) + ) + if self.cfg.osc.osc_bool: + rew = ph_bool * velocity + else: + rew = ph_sin * velocity + # return torch.mean(self._sqrdexp(rew), dim=1) # TODO: Higher reward if rew close to zero (how to decouple so that ignore if not in stance??) + return -torch.mean(rew, dim=1) + + def _reward_dof_vel(self): + return super()._reward_dof_vel() * self._switch() + + def _reward_dof_near_home(self): + return super()._reward_dof_near_home() * self._switch() + + def _reward_stand_still(self): + """Penalize motion at zero commands""" + # * normalize angles so we care about being within 5 deg + rew_pos = torch.mean( + self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1 + ) + rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1) + rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1) + rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1) + return (rew_vel + rew_pos - rew_base_vel) * self._switch() + + def _reward_standing_torques(self): + """Penalize torques at zero commands""" + return super()._reward_torques() * self._switch() + + # * gait similarity scores + def angle_difference(self, theta1, theta2): + diff = torch.abs(theta1 - theta2) % (2 * torch.pi) + return torch.min(diff, 2 * torch.pi - diff) + + def _reward_trot(self): + # ! diagonal difference, front right and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity = self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + return similarity + + def _reward_pronk(self): + # ! diagonal difference, front right and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity = self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference, front right and front left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference front right and hind right + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + return similarity + + def _reward_pace(self): + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity = self._sqrdexp(angle, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + + return similarity + + def _reward_any_symm_gait(self): + rew_trot = self._reward_trot() + rew_pace = self._reward_pace() + rew_bound = self._reward_bound() + return torch.max(torch.max(rew_trot, rew_pace), rew_bound) + + def _reward_enc_pace(self): + return self._reward_pace() + + def _reward_bound(self): + # ! difference, front right and front left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity = self._sqrdexp(angle, torch.pi) + # ! difference hind right and hind left + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference right side + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! difference right side + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + return similarity + + def _reward_asymettric(self): + # ! hind legs + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity = 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! difference, left legs + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! difference right legs + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! front legs + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! diagonal FR + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! diagonal FL + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + return similarity diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py new file mode 100644 index 0000000..a08d517 --- /dev/null +++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py @@ -0,0 +1,248 @@ +from gym.envs.mini_cheetah.mini_cheetah_config import ( + MiniCheetahCfg, + MiniCheetahRunnerCfg, +) + +BASE_HEIGHT_REF = 0.33 + + +class MiniCheetahOscCfg(MiniCheetahCfg): + class viewer: + ref_env = 0 + pos = [-2.0, 3, 2] # [m] + lookat = [0.0, 1.0, 0.5] # [m] + + class env(MiniCheetahCfg.env): + num_envs = 4096 + num_actuators = 12 + episode_length_s = 4.0 + env_spacing = 3.0 + + class terrain(MiniCheetahCfg.terrain): + mesh_type = "plane" + # mesh_type = 'trimesh' # none, plane, heightfield or trimesh + + class init_state(MiniCheetahCfg.init_state): + reset_mode = "reset_to_range" + timeout_reset_ratio = 0.75 + # * default COM for basic initialization + pos = [0.0, 0.0, 0.35] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + default_joint_angles = { + "haa": 0.0, + "hfe": -0.785398, + "kfe": 1.596976, + } + + # * initialization for random range setup + dof_pos_range = { + "haa": [-0.01, 0.01], + "hfe": [-0.785398, -0.785398], + "kfe": [1.596976, 1.596976], + } + dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]} + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [0.35, 0.35], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + root_vel_range = [ + [-0.5, 2.0], # x + [0.0, 0.0], # y + [-0.05, 0.05], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(MiniCheetahCfg.control): + # * PD Drive parameters: + stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0} + damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5} + ctrl_frequency = 100 + desired_sim_frequency = 500 + + class osc: + process_noise_std = 0.25 + # oscillator parameters + omega = 3 # 0.5 #3.5 # in Hz + coupling = 1 # 0.02 + osc_bool = False + grf_bool = False + randomize_osc_params = False + grf_threshold = 0.1 # 20. # Normalized to body weight + omega_range = [1.0, 4.0] # [0.0, 10.] + coupling_range = [ + 0.0, + 1.0, + ] # with normalized grf, can have omega/coupling on same scale + offset_range = [0.0, 0.0] + stop_threshold = 0.5 + omega_stop = 1.0 + omega_step = 2.0 + omega_slope = 1.0 + omega_max = 4.0 + omega_var = 0.25 + # coupling_step = 0. + # coupling_stop = 0. + coupling_stop = 4.0 + coupling_step = 1.0 + coupling_slope = 0.0 + coupling_max = 1.0 + offset = 1.0 + coupling_var = 0.25 + + init_to = "random" + init_w_offset = True + + class commands(MiniCheetahCfg.commands): + resampling_time = 3.0 # time before command are changed[s] + var = 1.0 + + class ranges(MiniCheetahCfg.commands.ranges): + lin_vel_x = [-3.0, -1.0, 0.0, 1.0, 3.0] # min max [m/s] + lin_vel_y = 1.0 # [-1., 0, 1.] # max [m/s] + yaw_vel = 3.0 # [-6., -3., 0., 3., 6.] # max [rad/s] + + class push_robots(MiniCheetahCfg.push_robots): + toggle = True + interval_s = 15 + max_push_vel_xy = 0.05 + push_box_dims = [0.2, 0.2, 0.2] # x,y,z [m] + + class domain_rand(MiniCheetahCfg.domain_rand): + randomize_friction = True + friction_range = [0.4, 1.0] + randomize_base_mass = False + lower_mass_offset = -0.5 # kg + upper_mass_offset = 2.0 + lower_z_offset = 0.0 # m + upper_z_offset = 0.2 + lower_x_offset = 0.0 + upper_x_offset = 0.0 + + class asset(MiniCheetahCfg.asset): + shank_length_diff = 0 # Units in cm + # file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" \ + # + "mini_cheetah/urdf/mini_cheetah_" \ + # + str(shank_length_diff) + ".urdf" + file = ( + "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + + "mini_cheetah/urdf/mini_cheetah_simple.urdf" + ) + foot_name = "foot" + penalize_contacts_on = ["thigh", "shank"] + terminate_after_contacts_on = ["base"] + collapse_fixed_joints = False + fix_base_link = False + self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter + flip_visual_attachments = False + disable_gravity = False + disable_motors = False # all torques set to 0 + + class reward_settings(MiniCheetahCfg.reward_settings): + soft_dof_pos_limit = 0.8 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 600.0 + base_height_target = BASE_HEIGHT_REF + 0.03 + tracking_sigma = 0.25 + switch_scale = 0.5 + + class scaling(MiniCheetahCfg.scaling): + pass + + +class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg): + seed = -1 + + class policy(MiniCheetahRunnerCfg.policy): + actor_hidden_dims = [256, 256, 128] + critic_hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + + actor_obs = [ + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "dof_pos_target", + ] + + critic_obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "oscillators_vel", + "dof_pos_target", + ] + + actions = ["dof_pos_target"] + + class noise: + pass + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 1.0 + torques = 5.0e-7 + dof_vel = 0.0 + min_base_height = 1.5 + collision = 0 + action_rate = 0.01 # -0.01 + action_rate2 = 0.001 # -0.001 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + swing_grf = 5.0 + stance_grf = 5.0 + swing_velocity = 0.0 + stance_velocity = 0.0 + coupled_grf = 0.0 # 8. + enc_pace = 0.0 + cursorial = 0.25 + standing_torques = 0.0 # 1.e-5 + + class termination_weight: + termination = 15.0 / 100.0 + + class algorithm(MiniCheetahRunnerCfg.algorithm): + # training params + value_loss_coef = 1.0 + use_clipped_value_loss = True + clip_param = 0.2 + entropy_coef = 0.02 + num_learning_epochs = 4 + # mini batch size = num_envs*nsteps/nminibatches + num_mini_batches = 8 + learning_rate = 1.0e-5 + schedule = "adaptive" # can be adaptive, fixed + discount_horizon = 1.0 + GAE_bootstrap_horizon = 2.0 + desired_kl = 0.01 + max_grad_norm = 1.0 + + class runner(MiniCheetahRunnerCfg.runner): + run_name = "" + experiment_name = "FullSend" + max_iterations = 500 # number of policy updates + algorithm_class_name = "PPO" + num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py index 2e2aa98..41bb634 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py @@ -16,42 +16,6 @@ class terrain(MiniCheetahCfg.terrain): pass class init_state(MiniCheetahCfg.init_state): - reset_mode = "reset_to_range" - # * default COM for basic initialization - pos = [0.0, 0.0, 0.33] # x,y,z [m] - rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] - lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] - ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] - default_joint_angles = { - "haa": 0.0, - "hfe": -0.785398, - "kfe": 1.596976, - } - - # * initialization for random range setup - dof_pos_range = { - "haa": [-0.00001, 0.00001], - "hfe": [-0.785398, -0.785398], - "kfe": [1.596976, 1.596976], - } - dof_vel_range = {"haa": [-0.2, 0.2], "hfe": [-0.2, 0.2], "kfe": [-0.2, 0.2]} - root_pos_range = [ - [0.0, 0.0], # x - [0.0, 0.0], # y - [0.35, 0.35], # z - [0.0, 0.0], # roll - [0.0, 0.0], # pitch - [0.0, 0.0], - ] # yaw - root_vel_range = [ - [-0.5, 2.0], # x - [0.0, 0.0], # y - [-0.05, 0.05], # z - [0.0, 0.0], # roll - [0.0, 0.0], # pitch - [0.0, 0.0], - ] # yaw - ref_traj = ( "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "mini_cheetah/trajectories/single_leg.csv" @@ -63,28 +27,16 @@ class control(MiniCheetahCfg.control): damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5} gait_freq = 3.0 ctrl_frequency = 100 - desired_sim_frequency = 1000 - - class commands: - resampling_time = 3.0 # time before command are changed[s] + desired_sim_frequency = 500 - class ranges: - lin_vel_x = [-1.0, 3.0] # min max [m/s] - lin_vel_y = 1.0 # max [m/s] - yaw_vel = 3.14 / 2.0 # max [rad/s] + class commands(MiniCheetahCfg.commands): + pass - class push_robots: - toggle = True - interval_s = 0.1 - max_push_vel_xy = 0.1 - push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + class push_robots(MiniCheetahCfg.push_robots): + pass class domain_rand(MiniCheetahCfg.domain_rand): - randomize_friction = True - friction_range = [0.6, 1.0] - randomize_base_mass = False - added_mass_range = [-1.0, 3.0] - friction_range = [0.0, 1.0] + pass class asset(MiniCheetahCfg.asset): file = ( @@ -96,17 +48,17 @@ class asset(MiniCheetahCfg.asset): terminate_after_contacts_on = ["base", "thigh"] collapse_fixed_joints = False fix_base_link = False - self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter + self_collisions = 1 flip_visual_attachments = False disable_gravity = False - disable_motors = False # all torques set to 0 + disable_motors = False class reward_settings(MiniCheetahCfg.reward_settings): soft_dof_pos_limit = 0.9 soft_dof_vel_limit = 0.9 soft_torque_limit = 0.9 max_contact_force = 600.0 - base_height_target = BASE_HEIGHT_REF + base_height_target = 0.3 tracking_sigma = 0.25 class scaling(MiniCheetahCfg.scaling): @@ -148,26 +100,18 @@ class policy(MiniCheetahRunnerCfg.policy): disable_actions = False class noise: - dof_pos_obs = 0.0 # 0.005 # can be made very low - dof_vel = 0.0 # 0.005 - ang_vel = 0.0 # [0.1, 0.1, 0.1] # 0.027, 0.14, 0.37 - base_ang_vel = 0.0 # 0. - dof_pos = 0.0 # 0.005 - dof_vel = 0.0 # 0.005 - lin_vel = 0.0 # 0. - ang_vel = 0.0 # [0.3, 0.15, 0.4] - gravity_vec = 0.0 # 0.05 + pass class reward: class weights: - tracking_lin_vel = 2.0 + tracking_lin_vel = 4.0 tracking_ang_vel = 2.0 lin_vel_z = 0.0 ang_vel_xy = 0.01 - orientation = 0.0 - torques = 5.0e-6 + orientation = 1.0 + torques = 5.0e-7 dof_vel = 0.0 - min_base_height = 0.0 # 1.5 + min_base_height = 1.5 collision = 0.0 action_rate = 0.01 action_rate2 = 0.001 @@ -175,16 +119,16 @@ class weights: dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 - reference_traj = 0.0 # 1.5 - swing_grf = 0.0 # 0.5 - stance_grf = 0.0 # 0.5 + reference_traj = 1.5 + swing_grf = 1.5 + stance_grf = 1.5 class pbrs_weights: - reference_traj = 1.0 - swing_grf = 1.0 - stance_grf = 1.0 - min_base_height = 1.0 - orientation = 1.0 + reference_traj = 0.0 + swing_grf = 0.0 + stance_grf = 0.0 + min_base_height = 0.0 + orientation = 0.0 class termination_weight: termination = 0.15 diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py index 7b912cb..74b842d 100644 --- a/learning/runners/my_runner.py +++ b/learning/runners/my_runner.py @@ -3,6 +3,7 @@ from learning.utils import Logger from learning.utils import PotentialBasedRewardShaping +from learning.utils import remove_zero_weighted_rewards from .on_policy_runner import OnPolicyRunner @@ -21,7 +22,7 @@ def __init__(self, env: VecEnv, train_cfg, device="cpu"): def learn(self): self.set_up_logger() - + remove_zero_weighted_rewards(self.policy_cfg["reward"]["pbrs_weights"]) PBRS = PotentialBasedRewardShaping( self.policy_cfg["reward"]["pbrs_weights"], self.device )