diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
index 1d4b38b..6a94f33 100644
--- a/gym/envs/__init__.py
+++ b/gym/envs/__init__.py
@@ -8,13 +8,14 @@
 # * 3. add the runner confg name and location to the runner config dict
 # * 3. register the task experiment name to the env/config/ppo classes
 
-# * from y import x where {y:x}
 class_dict = {
     "LeggedRobot": ".base.legged_robot",
     "FixedRobot": ".base.fixed_robot",
     "Cartpole": ".cartpole.cartpole",
     "MiniCheetah": ".mini_cheetah.mini_cheetah",
     "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref",
+    "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc",
+    "MiniCheetahOscIK": ".mini_cheetah.mini_cheetah_osc_IK",
     "MIT_Humanoid": ".mit_humanoid.mit_humanoid",
     "Anymal": ".anymal_c.anymal",
     "A1": ".a1.a1",
@@ -25,6 +26,8 @@
     "CartpoleCfg": ".cartpole.cartpole_config",
     "MiniCheetahCfg": ".mini_cheetah.mini_cheetah_config",
     "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config",
+    "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config",
+    "MiniCheetahOscIKCfg": ".mini_cheetah.mini_cheetah_osc_IK_config",
     "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config",
     "A1Cfg": ".a1.a1_config",
     "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -35,6 +38,8 @@
     "CartpoleRunnerCfg": ".cartpole.cartpole_config",
     "MiniCheetahRunnerCfg": ".mini_cheetah.mini_cheetah_config",
     "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config",
+    "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config",
+    "MiniCheetahOscIKRunnerCfg": ".mini_cheetah.mini_cheetah_osc_IK_config",
     "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config",
     "A1RunnerCfg": ".a1.a1_config",
     "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -49,6 +54,16 @@
         "MiniCheetahRefCfg",
         "MiniCheetahRefRunnerCfg",
     ],
+    "mini_cheetah_osc": [
+        "MiniCheetahOsc",
+        "MiniCheetahOscCfg",
+        "MiniCheetahOscRunnerCfg",
+    ],
+    "mini_cheetah_osc_IK": [
+        "MiniCheetahOscIK",
+        "MiniCheetahOscIKCfg",
+        "MiniCheetahOscIKRunnerCfg",
+    ],
     "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"],
     "humanoid_running": [
         "HumanoidRunning",
diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py
index a538067..de015b2 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_config.py
@@ -3,14 +3,14 @@
     LeggedRobotRunnerCfg,
 )
 
-BASE_HEIGHT_REF = 0.33
+BASE_HEIGHT_REF = 0.3
 
 
 class MiniCheetahCfg(LeggedRobotCfg):
     class env(LeggedRobotCfg.env):
         num_envs = 2**12
         num_actuators = 12
-        episode_length_s = 10
+        episode_length_s = 4
 
     class terrain(LeggedRobotCfg.terrain):
         mesh_type = "plane"
@@ -25,67 +25,63 @@ class init_state(LeggedRobotCfg.init_state):
         # * reset setup chooses how the initial conditions are chosen.
         # * "reset_to_basic" = a single position
         # * "reset_to_range" = uniformly random from a range defined below
-        reset_mode = "reset_to_basic"
+        reset_mode = "reset_to_range"
 
         # * default COM for basic initialization
-        pos = [0.0, 0.0, 0.33]  # x,y,z [m]
+        pos = [0.0, 0.0, 0.35]  # x,y,z [m]
         rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
 
         # * initialization for random range setup
         dof_pos_range = {
-            "haa": [-0.05, 0.05],
-            "hfe": [-0.85, -0.6],
-            "kfe": [-1.45, 1.72],
-        }
-        dof_vel_range = {
-            "haa": [0.0, 0.0],
-            "hfe": [0.0, 0.0],
-            "kfe": [0.0, 0.0],
+            "haa": [-0.01, 0.01],
+            "hfe": [-0.785398, -0.785398],
+            "kfe": [1.596976, 1.596976],
         }
+        dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]}
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [0.37, 0.4],  # z
+            [0.35, 0.35],  # z
             [0.0, 0.0],  # roll
             [0.0, 0.0],  # pitch
-            [0.0, 0.0],
-        ]  # yaw
+            [0.0, 0.0],  # yaw
+        ]
         root_vel_range = [
-            [-0.05, 0.05],  # x
+            [-0.5, 2.0],  # x
             [0.0, 0.0],  # y
             [-0.05, 0.05],  # z
             [0.0, 0.0],  # roll
             [0.0, 0.0],  # pitch
-            [0.0, 0.0],
-        ]  # yaw
+            [0.0, 0.0],  # yaw
+        ]
 
     class control(LeggedRobotCfg.control):
         # * PD Drive parameters:
         stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
         damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
         ctrl_frequency = 100
-        desired_sim_frequency = 1000
+        desired_sim_frequency = 500
 
     class commands:
         # * time before command are changed[s]
-        resampling_time = 10.0
+        resampling_time = 3.0
 
         class ranges:
-            lin_vel_x = [-1.0, 1.0]  # min max [m/s]
+            lin_vel_x = [-2.0, 3.0]  # min max [m/s]
             lin_vel_y = 1.0  # max [m/s]
-            yaw_vel = 1  # max [rad/s]
+            yaw_vel = 3  # max [rad/s]
 
     class push_robots:
-        toggle = False
-        interval_s = 1
-        max_push_vel_xy = 0.5
+        toggle = True
+        interval_s = 15
+        max_push_vel_xy = 0.05
         push_box_dims = [0.3, 0.1, 0.1]  # x,y,z [m]
 
     class domain_rand:
-        randomize_friction = False
-        friction_range = [0.5, 1.25]
+        randomize_friction = True
+        friction_range = [0.5, 1.0]
         randomize_base_mass = False
         added_mass_range = [-1.0, 1.0]
 
@@ -95,16 +91,13 @@ class asset(LeggedRobotCfg.asset):
             + "mini_cheetah/urdf/mini_cheetah_simple.urdf"
         )
         foot_name = "foot"
-        penalize_contacts_on = []
-        terminate_after_contacts_on = ["base", "thigh"]
+        penalize_contacts_on = ["shank"]
+        terminate_after_contacts_on = ["base"]
         end_effector_names = ["foot"]
-        # * merge bodies connected by fixed joints.
         collapse_fixed_joints = False
-        # * 1 to disable, 0 to enable...bitwise filter
         self_collisions = 1
         flip_visual_attachments = False
         disable_gravity = False
-        # * set all torques set to 0
         disable_motors = False
         joint_damping = 0.1
         rotor_inertia = [0.002268, 0.002268, 0.005484] * 4
@@ -118,34 +111,34 @@ class reward_settings(LeggedRobotCfg.reward_settings):
         tracking_sigma = 0.25
 
     class scaling(LeggedRobotCfg.scaling):
-        base_ang_vel = 1.0
-        base_lin_vel = 1.0
-        commands = 1
-        dof_vel = 100.0  # ought to be roughly max expected speed.
-        base_height = 1
-        dof_pos = 1
+        base_ang_vel = 0.3
+        base_lin_vel = BASE_HEIGHT_REF
+        dof_vel = 4 * [2.0, 2.0, 4.0]
+        base_height = 0.3
+        dof_pos = 4 * [0.2, 0.3, 0.3]
         dof_pos_obs = dof_pos
-        dof_pos_target = dof_pos
-        # tau_ff = 4*[18, 18, 28]  # hip-abad, hip-pitch, knee
+        dof_pos_target = 4 * [0.2, 0.3, 0.3]
+        tau_ff = 4 * [18, 18, 28]
+        commands = [3, 1, 3]
 
 
 class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg):
     seed = -1
 
     class policy(LeggedRobotRunnerCfg.policy):
-        actor_hidden_dims = [256, 256, 256]
-        critic_hidden_dims = [256, 256, 256]
+        actor_hidden_dims = [256, 256, 128]
+        critic_hidden_dims = [128, 64]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
 
         actor_obs = [
-            "base_height",
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
             "commands",
             "dof_pos_obs",
             "dof_vel",
+            "dof_pos_target",
         ]
         critic_obs = [
             "base_height",
@@ -155,33 +148,38 @@ class policy(LeggedRobotRunnerCfg.policy):
             "commands",
             "dof_pos_obs",
             "dof_vel",
+            "dof_pos_target",
         ]
 
         actions = ["dof_pos_target"]
-        add_noise = False
+        add_noise = True
 
         class noise:
-            dof_pos_obs = 0.005  # can be made very low
+            scale = 1.0
+            dof_pos_obs = 0.01
+            base_ang_vel = 0.01
+            dof_pos = 0.005
             dof_vel = 0.005
-            base_ang_vel = 0.05
-            projected_gravity = 0.02
+            lin_vel = 0.05
+            ang_vel = [0.3, 0.15, 0.4]
+            gravity_vec = 0.1
 
         class reward(LeggedRobotRunnerCfg.policy.reward):
             class weights(LeggedRobotRunnerCfg.policy.reward.weights):
-                tracking_lin_vel = 5.0
-                tracking_ang_vel = 5.0
+                tracking_lin_vel = 4.0
+                tracking_ang_vel = 2.0
                 lin_vel_z = 0.0
-                ang_vel_xy = 0.0
+                ang_vel_xy = 0.01
                 orientation = 1.0
                 torques = 5.0e-7
-                dof_vel = 1.0
-                base_height = 1.0
-                action_rate = 0.001
-                action_rate2 = 0.0001
+                dof_vel = 0.0
+                min_base_height = 1.5
+                action_rate = 0.01
+                action_rate2 = 0.001
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
-                dof_near_home = 1.0
+                dof_near_home = 0.0
 
             class termination_weight:
                 termination = 0.01
@@ -191,23 +189,20 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         value_loss_coef = 1.0
         use_clipped_value_loss = True
         clip_param = 0.2
-        entropy_coef = 0.01
-        num_learning_epochs = 6
+        entropy_coef = 0.02
+        num_learning_epochs = 4
         # * mini batch size = num_envs*nsteps / nminibatches
-        num_mini_batches = 6
-        learning_rate = 1.0e-4
+        num_mini_batches = 8
+        learning_rate = 1.0e-5
         schedule = "adaptive"  # can be adaptive or fixed
         discount_horizon = 1.0  # [s]
-        GAE_bootstrap_horizon = 1.0  # [s]
+        GAE_bootstrap_horizon = 2.0  # [s]
         desired_kl = 0.01
         max_grad_norm = 1.0
 
     class runner(LeggedRobotRunnerCfg.runner):
         run_name = ""
         experiment_name = "mini_cheetah"
-        # * number of policy updates
-        max_iterations = 1000
+        max_iterations = 500
         algorithm_class_name = "PPO"
-        # * per iteration
-        # * (n_steps in Rudin 2021 paper - batch_size = n_steps * n_robots)
-        num_steps_per_env = 24
+        num_steps_per_env = 32
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py
new file mode 100644
index 0000000..2f8880c
--- /dev/null
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py
@@ -0,0 +1,480 @@
+import torch
+import numpy as np
+from isaacgym.torch_utils import torch_rand_float
+
+from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah
+
+from isaacgym import gymtorch
+
+MINI_CHEETAH_MASS = 8.292 * 9.81  # Weight of mini cheetah in Newtons
+
+
+class MiniCheetahOsc(MiniCheetah):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+        self.process_noise_std = self.cfg.osc.process_noise_std
+
+    def _init_buffers(self):
+        super()._init_buffers()
+        self.oscillators = torch.zeros(self.num_envs, 4, device=self.device)
+        self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device)
+
+        self.oscillators_vel = torch.zeros_like(self.oscillators)
+        self.grf = torch.zeros(self.num_envs, 4, device=self.device)
+        self.osc_omega = self.cfg.osc.omega * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+        self.osc_coupling = self.cfg.osc.coupling * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+        self.osc_offset = self.cfg.osc.offset * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+
+    def _reset_oscillators(self, env_ids):
+        if len(env_ids) == 0:
+            return
+            # * random
+        if self.cfg.osc.init_to == "random":
+            self.oscillators[env_ids] = torch_rand_float(
+                0,
+                2 * torch.pi,
+                shape=self.oscillators[env_ids].shape,
+                device=self.device,
+            )
+        elif self.cfg.osc.init_to == "standing":
+            self.oscillators[env_ids] = 3 * torch.pi / 2
+        elif self.cfg.osc.init_to == "trot":
+            self.oscillators[env_ids] = torch.tensor(
+                [0.0, torch.pi, torch.pi, 0.0], device=self.device
+            )
+        elif self.cfg.osc.init_to == "pace":
+            self.oscillators[env_ids] = torch.tensor(
+                [0.0, torch.pi, 0.0, torch.pi], device=self.device
+            )
+            if self.cfg.osc.init_w_offset:
+                self.oscillators[env_ids, :] += (
+                    torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1)
+                    * 2
+                    * torch.pi
+                )
+        elif self.cfg.osc.init_to == "pronk":
+            self.oscillators[env_ids, :] *= 0.0
+        elif self.cfg.osc.init_to == "bound":
+            self.oscillators[env_ids, :] = torch.tensor(
+                [torch.pi, torch.pi, 0.0, 0.0], device=self.device
+            )
+        else:
+            raise NotImplementedError
+
+        if self.cfg.osc.init_w_offset:
+            self.oscillators[env_ids, :] += (
+                torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1)
+                * 2
+                * torch.pi
+            )
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+
+    def _reset_system(self, env_ids):
+        if len(env_ids) == 0:
+            return
+        self._reset_oscillators(env_ids)
+
+        self.oscillator_obs = torch.cat(
+            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
+        )
+
+        # * keep some robots in the same starting state as they ended
+        timed_out_subset = (self.timed_out & ~self.terminated) * (
+            torch.rand(self.num_envs, device=self.device)
+            < self.cfg.init_state.timeout_reset_ratio
+        )
+
+        env_ids = (self.terminated | timed_out_subset).nonzero().flatten()
+        if len(env_ids) == 0:
+            return
+        super()._reset_system(env_ids)
+
+    def _pre_decimation_step(self):
+        super()._pre_decimation_step()
+        # self.grf = self._compute_grf()
+        if not self.cfg.osc.randomize_osc_params:
+            self.compute_osc_slope()
+
+    def compute_osc_slope(self):
+        cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold
+        stop = cmd_x < 0
+
+        self.osc_offset = stop * self.cfg.osc.offset
+        self.osc_omega = (
+            stop * self.cfg.osc.omega_stop
+            + torch.randn_like(self.osc_omega) * self.cfg.osc.omega_var
+        )
+        self.osc_coupling = (
+            stop * self.cfg.osc.coupling_stop
+            + torch.randn_like(self.osc_coupling) * self.cfg.osc.coupling_var
+        )
+
+        self.osc_omega += (~stop) * torch.clamp(
+            cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step,
+            min=0.0,
+            max=self.cfg.osc.omega_max,
+        )
+        self.osc_coupling += (~stop) * torch.clamp(
+            cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step,
+            min=0.0,
+            max=self.cfg.osc.coupling_max,
+        )
+
+        self.osc_omega = torch.clamp_min(self.osc_omega, 0.1)
+        self.osc_coupling = torch.clamp_min(self.osc_coupling, 0)
+
+    def _process_rigid_body_props(self, props, env_id):
+        if env_id == 0:
+            # * init buffers for the domain rand changes
+            self.mass = torch.zeros(self.num_envs, 1, device=self.device)
+            self.com = torch.zeros(self.num_envs, 3, device=self.device)
+
+        # * randomize mass
+        if self.cfg.domain_rand.randomize_base_mass:
+            lower = self.cfg.domain_rand.lower_mass_offset
+            upper = self.cfg.domain_rand.upper_mass_offset
+            # self.mass_
+            props[0].mass += np.random.uniform(lower, upper)
+            self.mass[env_id] = props[0].mass
+            # * randomize com position
+            lower = self.cfg.domain_rand.lower_z_offset
+            upper = self.cfg.domain_rand.upper_z_offset
+            props[0].com.z += np.random.uniform(lower, upper)
+            self.com[env_id, 2] = props[0].com.z
+
+            lower = self.cfg.domain_rand.lower_x_offset
+            upper = self.cfg.domain_rand.upper_x_offset
+            props[0].com.x += np.random.uniform(lower, upper)
+            self.com[env_id, 0] = props[0].com.x
+        return props
+
+    def _post_decimation_step(self):
+        """Update all states that are not handled in PhysX"""
+        super()._post_decimation_step()
+        self.grf = self._compute_grf()
+        # self._step_oscillators()
+
+    def _post_physx_step(self):
+        super()._post_physx_step()
+        self._step_oscillators(self.dt / self.cfg.control.decimation)
+        return None
+
+    def _step_oscillators(self, dt=None):
+        if dt is None:
+            dt = self.dt
+
+        local_feedback = self.osc_coupling * (
+            torch.cos(self.oscillators) + self.osc_offset
+        )
+        grf = self._compute_grf()
+        self.oscillators_vel = self.osc_omega - grf * local_feedback
+        # self.oscillators_vel *= torch_rand_float(0.9,
+        #                                          1.1,
+        #                                          self.oscillators_vel.shape,
+        #                                          self.device)
+        self.oscillators_vel += (
+            torch.randn(self.oscillators_vel.shape, device=self.device)
+            * self.cfg.osc.process_noise_std
+        )
+
+        self.oscillators_vel *= 2 * torch.pi
+        self.oscillators += (
+            self.oscillators_vel * dt
+        )  # torch.clamp(self.oscillators_vel * dt, min=0)
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+        self.oscillator_obs = torch.cat(
+            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
+        )
+
+    def _resample_commands(self, env_ids):
+        """Randommly select commands of some environments
+
+        Args:
+            env_ids (List[int]): Environments ids for which new commands are needed
+        """
+        if len(env_ids) == 0:
+            return
+        super()._resample_commands(env_ids)
+        possible_commands = torch.tensor(
+            self.command_ranges["lin_vel_x"], device=self.device
+        )
+        self.commands[env_ids, 0:1] = possible_commands[
+            torch.randint(
+                0, len(possible_commands), (len(env_ids), 1), device=self.device
+            )
+        ]
+
+        if 0 in self.cfg.commands.ranges.lin_vel_x:
+            # * with 20% chance, reset to 0 commands except for forward
+            self.commands[env_ids, 1:] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.8
+            ).unsqueeze(1)
+            # * with 20% chance, reset to 0 commands except for rotation
+            self.commands[env_ids, :2] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.8
+            ).unsqueeze(1)
+            # * with 10% chance, reset to 0
+            self.commands[env_ids, :] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.9
+            ).unsqueeze(1)
+
+        if self.cfg.osc.randomize_osc_params:
+            self._resample_osc_params(env_ids)
+
+    def _resample_osc_params(self, env_ids):
+        if len(env_ids) > 0:
+            self.osc_omega[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.omega_range[0],
+                self.cfg.osc.omega_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+            self.osc_coupling[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.coupling_range[0],
+                self.cfg.osc.coupling_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+            self.osc_offset[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.offset_range[0],
+                self.cfg.osc.offset_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+
+    def perturb_base_velocity(self, velocity_delta, env_ids=None):
+        if env_ids is None:
+            env_ids = [range(self.num_envs)]
+        self.root_states[env_ids, 7:10] += velocity_delta
+        self.gym.set_actor_root_state_tensor(
+            self.sim, gymtorch.unwrap_tensor(self.root_states)
+        )
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / MINI_CHEETAH_MASS, 1.0)
+        else:
+            return grf
+
+    def _switch(self):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        return torch.exp(
+            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+            / self.cfg.reward_settings.switch_scale
+        )
+
+    def _reward_cursorial(self):
+        # penalize the abad joints being away from 0
+        return -torch.mean(
+            torch.square(self.dof_pos[:, 0:12:3] / self.scales["dof_pos"][0]), dim=1
+        )
+
+    def _reward_swing_grf(self):
+        # Reward non-zero grf during swing (0 to pi)
+        rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool)
+        return -torch.sum(rew, dim=1)
+
+    def _reward_stance_grf(self):
+        # Reward non-zero grf during stance (pi to 2pi)
+        rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool)
+        return torch.sum(rew, dim=1)
+
+    def get_swing_grf(self, osc_bool=False, contact_bool=False):
+        if osc_bool:
+            phase = torch.lt(self.oscillators, torch.pi).int()
+        else:
+            phase = torch.maximum(
+                torch.zeros_like(self.oscillators), torch.sin(self.oscillators)
+            )
+        if contact_bool:
+            return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold)
+        else:
+            return phase * self._compute_grf()
+
+    def get_stance_grf(self, osc_bool=False, contact_bool=False):
+        if osc_bool:
+            phase = torch.gt(self.oscillators, torch.pi).int()
+        else:
+            phase = torch.maximum(
+                torch.zeros_like(self.oscillators), -torch.sin(self.oscillators)
+            )
+        if contact_bool:
+            return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold)
+        else:
+            return phase * self._compute_grf()
+
+    def _reward_coupled_grf(self):
+        """
+        Multiply rewards for stance/swing grf, discount when undesirable
+        behavior (grf during swing, no grf during stance)
+        """
+        swing_rew = self.get_swing_grf()
+        stance_rew = self.get_stance_grf()
+        combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew
+        prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1)
+        return prod - torch.ones_like(prod)
+
+    def _reward_swing_velocity(self):
+        """Reward non-zero end effector velocity during swing (0 to pi)"""
+        # velocity = torch.tanh(torch.norm(self.end_effector_lin_vel, dim=-1))
+        velocity = torch.zeros_like(
+            self.oscillators
+        )  # TODO: Grab velocity from AJ V2 code
+        phase_bool = torch.lt(self.oscillators, torch.pi).int()
+        phase_sin = torch.maximum(
+            torch.zeros_like(self.oscillators), torch.sin(self.oscillators)
+        )
+        if self.cfg.osc.osc_bool:
+            rew = phase_bool * velocity
+        else:
+            rew = phase_sin * velocity
+        return torch.mean(rew, dim=1)
+
+    def _reward_stance_velocity(self):
+        """Reward zero end effector velocity during swing (pi to 2pi)"""
+        # velocity = torch.tanh(torch.norm(self.end_effector_lin_vel, dim=-1))
+        velocity = torch.zeros_like(
+            self.oscillators
+        )  # TODO: Grab velocity from AJ V2 code
+        ph_bool = torch.gt(self.oscillators, torch.pi).int()
+        ph_sin = torch.maximum(
+            torch.zeros_like(self.oscillators), -torch.sin(self.oscillators)
+        )
+        if self.cfg.osc.osc_bool:
+            rew = ph_bool * velocity
+        else:
+            rew = ph_sin * velocity
+        # return torch.mean(self._sqrdexp(rew), dim=1)  # TODO: Higher reward if rew close to zero (how to decouple so that ignore if not in stance??)
+        return -torch.mean(rew, dim=1)
+
+    def _reward_dof_vel(self):
+        return super()._reward_dof_vel() * self._switch()
+
+    def _reward_dof_near_home(self):
+        return super()._reward_dof_near_home() * self._switch()
+
+    def _reward_stand_still(self):
+        """Penalize motion at zero commands"""
+        # * normalize angles so we care about being within 5 deg
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+
+    def _reward_standing_torques(self):
+        """Penalize torques at zero commands"""
+        return super()._reward_torques() * self._switch()
+
+    # * gait similarity scores
+    def angle_difference(self, theta1, theta2):
+        diff = torch.abs(theta1 - theta2) % (2 * torch.pi)
+        return torch.min(diff, 2 * torch.pi - diff)
+
+    def _reward_trot(self):
+        # ! diagonal difference, front right and hind left
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3])
+        similarity = self._sqrdexp(angle, torch.pi)
+        # ! diagonal difference, front left and hind right
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        # ! diagonal difference, front left and hind right
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+        # ! diagonal difference, front left and hind right
+        angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+        return similarity
+
+    def _reward_pronk(self):
+        # ! diagonal difference, front right and hind left
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3])
+        similarity = self._sqrdexp(angle, torch.pi)
+        # ! diagonal difference, front left and hind right
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        # ! difference, front right and front left
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        # ! difference front right and hind right
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        return similarity
+
+    def _reward_pace(self):
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2])
+        similarity = self._sqrdexp(angle, torch.pi)
+        # ! difference front left and hind left
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        # ! difference front left and hind left
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+        # ! difference front left and hind left
+        angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+
+        return similarity
+
+    def _reward_any_symm_gait(self):
+        rew_trot = self._reward_trot()
+        rew_pace = self._reward_pace()
+        rew_bound = self._reward_bound()
+        return torch.max(torch.max(rew_trot, rew_pace), rew_bound)
+
+    def _reward_enc_pace(self):
+        return self._reward_pace()
+
+    def _reward_bound(self):
+        # ! difference, front right and front left
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1])
+        similarity = self._sqrdexp(angle, torch.pi)
+        # ! difference hind right and hind left
+        angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3])
+        similarity *= self._sqrdexp(angle, torch.pi)
+        # ! difference right side
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+        # ! difference right side
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3])
+        similarity *= self._sqrdexp(angle - torch.pi, torch.pi)
+        return similarity
+
+    def _reward_asymettric(self):
+        # ! hind legs
+        angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3])
+        similarity = 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        # ! difference, left legs
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3])
+        similarity *= 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        # ! difference right legs
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2])
+        similarity *= 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        # ! front legs
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1])
+        similarity *= 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        # ! diagonal FR
+        angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3])
+        similarity *= 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        # ! diagonal FL
+        angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2])
+        similarity *= 1 - self._sqrdexp(angle, torch.pi)
+        similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi)
+        return similarity
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
new file mode 100644
index 0000000..a08d517
--- /dev/null
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
@@ -0,0 +1,248 @@
+from gym.envs.mini_cheetah.mini_cheetah_config import (
+    MiniCheetahCfg,
+    MiniCheetahRunnerCfg,
+)
+
+BASE_HEIGHT_REF = 0.33
+
+
+class MiniCheetahOscCfg(MiniCheetahCfg):
+    class viewer:
+        ref_env = 0
+        pos = [-2.0, 3, 2]  # [m]
+        lookat = [0.0, 1.0, 0.5]  # [m]
+
+    class env(MiniCheetahCfg.env):
+        num_envs = 4096
+        num_actuators = 12
+        episode_length_s = 4.0
+        env_spacing = 3.0
+
+    class terrain(MiniCheetahCfg.terrain):
+        mesh_type = "plane"
+        # mesh_type = 'trimesh'  # none, plane, heightfield or trimesh
+
+    class init_state(MiniCheetahCfg.init_state):
+        reset_mode = "reset_to_range"
+        timeout_reset_ratio = 0.75
+        # * default COM for basic initialization
+        pos = [0.0, 0.0, 0.35]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+        default_joint_angles = {
+            "haa": 0.0,
+            "hfe": -0.785398,
+            "kfe": 1.596976,
+        }
+
+        # * initialization for random range setup
+        dof_pos_range = {
+            "haa": [-0.01, 0.01],
+            "hfe": [-0.785398, -0.785398],
+            "kfe": [1.596976, 1.596976],
+        }
+        dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]}
+        root_pos_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [0.35, 0.35],  # z
+            [0.0, 0.0],  # roll
+            [0.0, 0.0],  # pitch
+            [0.0, 0.0],  # yaw
+        ]
+        root_vel_range = [
+            [-0.5, 2.0],  # x
+            [0.0, 0.0],  # y
+            [-0.05, 0.05],  # z
+            [0.0, 0.0],  # roll
+            [0.0, 0.0],  # pitch
+            [0.0, 0.0],  # yaw
+        ]
+
+    class control(MiniCheetahCfg.control):
+        # * PD Drive parameters:
+        stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
+        damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
+        ctrl_frequency = 100
+        desired_sim_frequency = 500
+
+    class osc:
+        process_noise_std = 0.25
+        # oscillator parameters
+        omega = 3  # 0.5 #3.5  # in Hz
+        coupling = 1  # 0.02
+        osc_bool = False
+        grf_bool = False
+        randomize_osc_params = False
+        grf_threshold = 0.1  # 20. # Normalized to body weight
+        omega_range = [1.0, 4.0]  # [0.0, 10.]
+        coupling_range = [
+            0.0,
+            1.0,
+        ]  # with normalized grf, can have omega/coupling on same scale
+        offset_range = [0.0, 0.0]
+        stop_threshold = 0.5
+        omega_stop = 1.0
+        omega_step = 2.0
+        omega_slope = 1.0
+        omega_max = 4.0
+        omega_var = 0.25
+        # coupling_step = 0.
+        # coupling_stop = 0.
+        coupling_stop = 4.0
+        coupling_step = 1.0
+        coupling_slope = 0.0
+        coupling_max = 1.0
+        offset = 1.0
+        coupling_var = 0.25
+
+        init_to = "random"
+        init_w_offset = True
+
+    class commands(MiniCheetahCfg.commands):
+        resampling_time = 3.0  # time before command are changed[s]
+        var = 1.0
+
+        class ranges(MiniCheetahCfg.commands.ranges):
+            lin_vel_x = [-3.0, -1.0, 0.0, 1.0, 3.0]  # min max [m/s]
+            lin_vel_y = 1.0  # [-1., 0, 1.]  # max [m/s]
+            yaw_vel = 3.0  # [-6., -3., 0., 3., 6.]    # max [rad/s]
+
+    class push_robots(MiniCheetahCfg.push_robots):
+        toggle = True
+        interval_s = 15
+        max_push_vel_xy = 0.05
+        push_box_dims = [0.2, 0.2, 0.2]  # x,y,z [m]
+
+    class domain_rand(MiniCheetahCfg.domain_rand):
+        randomize_friction = True
+        friction_range = [0.4, 1.0]
+        randomize_base_mass = False
+        lower_mass_offset = -0.5  # kg
+        upper_mass_offset = 2.0
+        lower_z_offset = 0.0  # m
+        upper_z_offset = 0.2
+        lower_x_offset = 0.0
+        upper_x_offset = 0.0
+
+    class asset(MiniCheetahCfg.asset):
+        shank_length_diff = 0  # Units in cm
+        # file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" \
+        #     + "mini_cheetah/urdf/mini_cheetah_" \
+        #     + str(shank_length_diff) + ".urdf"
+        file = (
+            "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
+            + "mini_cheetah/urdf/mini_cheetah_simple.urdf"
+        )
+        foot_name = "foot"
+        penalize_contacts_on = ["thigh", "shank"]
+        terminate_after_contacts_on = ["base"]
+        collapse_fixed_joints = False
+        fix_base_link = False
+        self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
+        flip_visual_attachments = False
+        disable_gravity = False
+        disable_motors = False  # all torques set to 0
+
+    class reward_settings(MiniCheetahCfg.reward_settings):
+        soft_dof_pos_limit = 0.8
+        soft_dof_vel_limit = 0.9
+        soft_torque_limit = 0.9
+        max_contact_force = 600.0
+        base_height_target = BASE_HEIGHT_REF + 0.03
+        tracking_sigma = 0.25
+        switch_scale = 0.5
+
+    class scaling(MiniCheetahCfg.scaling):
+        pass
+
+
+class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg):
+    seed = -1
+
+    class policy(MiniCheetahRunnerCfg.policy):
+        actor_hidden_dims = [256, 256, 128]
+        critic_hidden_dims = [256, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+
+        actor_obs = [
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "oscillator_obs",
+            "dof_pos_target",
+        ]
+
+        critic_obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "oscillator_obs",
+            "oscillators_vel",
+            "dof_pos_target",
+        ]
+
+        actions = ["dof_pos_target"]
+
+        class noise:
+            pass
+
+        class reward:
+            class weights:
+                tracking_lin_vel = 4.0
+                tracking_ang_vel = 2.0
+                lin_vel_z = 0.0
+                ang_vel_xy = 0.01
+                orientation = 1.0
+                torques = 5.0e-7
+                dof_vel = 0.0
+                min_base_height = 1.5
+                collision = 0
+                action_rate = 0.01  # -0.01
+                action_rate2 = 0.001  # -0.001
+                stand_still = 0.0
+                dof_pos_limits = 0.0
+                feet_contact_forces = 0.0
+                dof_near_home = 0.0
+                swing_grf = 5.0
+                stance_grf = 5.0
+                swing_velocity = 0.0
+                stance_velocity = 0.0
+                coupled_grf = 0.0  # 8.
+                enc_pace = 0.0
+                cursorial = 0.25
+                standing_torques = 0.0  # 1.e-5
+
+            class termination_weight:
+                termination = 15.0 / 100.0
+
+    class algorithm(MiniCheetahRunnerCfg.algorithm):
+        # training params
+        value_loss_coef = 1.0
+        use_clipped_value_loss = True
+        clip_param = 0.2
+        entropy_coef = 0.02
+        num_learning_epochs = 4
+        # mini batch size = num_envs*nsteps/nminibatches
+        num_mini_batches = 8
+        learning_rate = 1.0e-5
+        schedule = "adaptive"  # can be adaptive, fixed
+        discount_horizon = 1.0
+        GAE_bootstrap_horizon = 2.0
+        desired_kl = 0.01
+        max_grad_norm = 1.0
+
+    class runner(MiniCheetahRunnerCfg.runner):
+        run_name = ""
+        experiment_name = "FullSend"
+        max_iterations = 500  # number of policy updates
+        algorithm_class_name = "PPO"
+        num_steps_per_env = 32
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index 2e2aa98..41bb634 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -16,42 +16,6 @@ class terrain(MiniCheetahCfg.terrain):
         pass
 
     class init_state(MiniCheetahCfg.init_state):
-        reset_mode = "reset_to_range"
-        # * default COM for basic initialization
-        pos = [0.0, 0.0, 0.33]  # x,y,z [m]
-        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
-        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
-        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
-        default_joint_angles = {
-            "haa": 0.0,
-            "hfe": -0.785398,
-            "kfe": 1.596976,
-        }
-
-        # * initialization for random range setup
-        dof_pos_range = {
-            "haa": [-0.00001, 0.00001],
-            "hfe": [-0.785398, -0.785398],
-            "kfe": [1.596976, 1.596976],
-        }
-        dof_vel_range = {"haa": [-0.2, 0.2], "hfe": [-0.2, 0.2], "kfe": [-0.2, 0.2]}
-        root_pos_range = [
-            [0.0, 0.0],  # x
-            [0.0, 0.0],  # y
-            [0.35, 0.35],  # z
-            [0.0, 0.0],  # roll
-            [0.0, 0.0],  # pitch
-            [0.0, 0.0],
-        ]  # yaw
-        root_vel_range = [
-            [-0.5, 2.0],  # x
-            [0.0, 0.0],  # y
-            [-0.05, 0.05],  # z
-            [0.0, 0.0],  # roll
-            [0.0, 0.0],  # pitch
-            [0.0, 0.0],
-        ]  # yaw
-
         ref_traj = (
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
             + "mini_cheetah/trajectories/single_leg.csv"
@@ -63,28 +27,16 @@ class control(MiniCheetahCfg.control):
         damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
         gait_freq = 3.0
         ctrl_frequency = 100
-        desired_sim_frequency = 1000
-
-    class commands:
-        resampling_time = 3.0  # time before command are changed[s]
+        desired_sim_frequency = 500
 
-        class ranges:
-            lin_vel_x = [-1.0, 3.0]  # min max [m/s]
-            lin_vel_y = 1.0  # max [m/s]
-            yaw_vel = 3.14 / 2.0  # max [rad/s]
+    class commands(MiniCheetahCfg.commands):
+        pass
 
-    class push_robots:
-        toggle = True
-        interval_s = 0.1
-        max_push_vel_xy = 0.1
-        push_box_dims = [0.3, 0.1, 0.1]  # x,y,z [m]
+    class push_robots(MiniCheetahCfg.push_robots):
+        pass
 
     class domain_rand(MiniCheetahCfg.domain_rand):
-        randomize_friction = True
-        friction_range = [0.6, 1.0]
-        randomize_base_mass = False
-        added_mass_range = [-1.0, 3.0]
-        friction_range = [0.0, 1.0]
+        pass
 
     class asset(MiniCheetahCfg.asset):
         file = (
@@ -96,17 +48,17 @@ class asset(MiniCheetahCfg.asset):
         terminate_after_contacts_on = ["base", "thigh"]
         collapse_fixed_joints = False
         fix_base_link = False
-        self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
+        self_collisions = 1
         flip_visual_attachments = False
         disable_gravity = False
-        disable_motors = False  # all torques set to 0
+        disable_motors = False
 
     class reward_settings(MiniCheetahCfg.reward_settings):
         soft_dof_pos_limit = 0.9
         soft_dof_vel_limit = 0.9
         soft_torque_limit = 0.9
         max_contact_force = 600.0
-        base_height_target = BASE_HEIGHT_REF
+        base_height_target = 0.3
         tracking_sigma = 0.25
 
     class scaling(MiniCheetahCfg.scaling):
@@ -148,26 +100,18 @@ class policy(MiniCheetahRunnerCfg.policy):
         disable_actions = False
 
         class noise:
-            dof_pos_obs = 0.0  # 0.005  # can be made very low
-            dof_vel = 0.0  # 0.005
-            ang_vel = 0.0  # [0.1, 0.1, 0.1]  # 0.027, 0.14, 0.37
-            base_ang_vel = 0.0  # 0.
-            dof_pos = 0.0  # 0.005
-            dof_vel = 0.0  # 0.005
-            lin_vel = 0.0  # 0.
-            ang_vel = 0.0  # [0.3, 0.15, 0.4]
-            gravity_vec = 0.0  # 0.05
+            pass
 
         class reward:
             class weights:
-                tracking_lin_vel = 2.0
+                tracking_lin_vel = 4.0
                 tracking_ang_vel = 2.0
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.01
-                orientation = 0.0
-                torques = 5.0e-6
+                orientation = 1.0
+                torques = 5.0e-7
                 dof_vel = 0.0
-                min_base_height = 0.0  # 1.5
+                min_base_height = 1.5
                 collision = 0.0
                 action_rate = 0.01
                 action_rate2 = 0.001
@@ -175,16 +119,16 @@ class weights:
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
                 dof_near_home = 0.0
-                reference_traj = 0.0  # 1.5
-                swing_grf = 0.0  # 0.5
-                stance_grf = 0.0  # 0.5
+                reference_traj = 1.5
+                swing_grf = 1.5
+                stance_grf = 1.5
 
             class pbrs_weights:
-                reference_traj = 1.0
-                swing_grf = 1.0
-                stance_grf = 1.0
-                min_base_height = 1.0
-                orientation = 1.0
+                reference_traj = 0.0
+                swing_grf = 0.0
+                stance_grf = 0.0
+                min_base_height = 0.0
+                orientation = 0.0
 
             class termination_weight:
                 termination = 0.15
diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py
index 7b912cb..74b842d 100644
--- a/learning/runners/my_runner.py
+++ b/learning/runners/my_runner.py
@@ -3,6 +3,7 @@
 
 from learning.utils import Logger
 from learning.utils import PotentialBasedRewardShaping
+from learning.utils import remove_zero_weighted_rewards
 
 from .on_policy_runner import OnPolicyRunner
 
@@ -21,7 +22,7 @@ def __init__(self, env: VecEnv, train_cfg, device="cpu"):
 
     def learn(self):
         self.set_up_logger()
-
+        remove_zero_weighted_rewards(self.policy_cfg["reward"]["pbrs_weights"])
         PBRS = PotentialBasedRewardShaping(
             self.policy_cfg["reward"]["pbrs_weights"], self.device
         )