From 63663b73c221fabf62430c8dfbcb17ebf37ad098 Mon Sep 17 00:00:00 2001
From: Divye Pratap Jain <divye@D-10-18-171-180.dhcp4.washington.edu>
Date: Fri, 17 May 2019 15:07:27 -0700
Subject: [PATCH 1/2] added evalua_policy_vil in gym_env

---
 mjrl/utils/gym_env.py | 76 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/mjrl/utils/gym_env.py b/mjrl/utils/gym_env.py
index 9061b05..de575b5 100644
--- a/mjrl/utils/gym_env.py
+++ b/mjrl/utils/gym_env.py
@@ -1,5 +1,6 @@
 import gym
 import numpy as np
+import tqdm
 
 class EnvSpec(object):
     def __init__(self, obs_dim, act_dim, horizon, num_agents):
@@ -79,6 +80,7 @@ def evaluate_policy(self, policy,
         if seed is not None:
             self.env._seed(seed)
         horizon = self._horizon if horizon is None else horizon
+
         mean_eval, std, min_eval, max_eval = 0.0, 0.0, -1e8, -1e8
         ep_returns = np.zeros(num_episodes)
 
@@ -123,3 +125,77 @@ def evaluate_policy(self, policy,
             full_dist = ep_returns
 
         return [base_stats, percentile_stats, full_dist]
+
+    def evaluate_policy_vil(self, policy,
+                            use_tactile,
+                            num_episodes=5,
+                            horizon=None,
+                            gamma=1,
+                            percentile=[],
+                            get_full_dist=False,
+                            mean_action=False,
+                            terminate_at_done=True,
+                            seed=None,
+                            camera_name=None,
+                            device_id=0,
+                            use_cuda=False,
+                            frame_size=(128, 128)):
+
+        if seed is not None:
+            self.env.env._seed(seed)
+            np.random.seed(seed)
+
+        horizon = self._horizon if horizon is None else horizon
+        mean_eval, std, min_eval, max_eval = 0.0, 0.0, -1e8, -1e8
+        ep_returns = np.zeros(num_episodes)
+
+        for ep in tqdm(range(num_episodes)):
+
+            self.reset()
+            robot_info = self.env.env.get_proprioception(use_tactile=use_tactile)
+
+            path_image_pixels = []
+            t, done = 0, False
+            while t < horizon and not (done and terminate_at_done):
+                image_pix = self.get_pixels(frame_size=frame_size, camera_name=camera_name, device_id=device_id)
+                img = image_pix
+                prev_img = image_pix
+                prev_prev_img = image_pix
+                if t > 0:
+                    prev_img = path_image_pixels[t - 1]
+
+                if t > 1:
+                    prev_prev_img = path_image_pixels[t - 2]
+                path_image_pixels.append(img)
+                prev_prev_img = np.expand_dims(prev_prev_img, axis=0)
+                prev_img = np.expand_dims(prev_img, axis=0)
+                img = np.expand_dims(img, axis=0)
+
+                o = np.concatenate((prev_prev_img, prev_img, img), axis=0)
+
+                if mean_action:
+                    a = policy.get_action(o, robot_info=robot_info, use_cuda=use_cuda)[1]['mean']
+                else:
+                    a = policy.get_action(o, robot_info=robot_info, use_cuda=use_cuda)[0]
+
+                o, r, done, _ = self.step(a)
+
+                robot_info = self.env.env.get_proprioception(use_tactile=use_tactile)
+
+                ep_returns[ep] += (gamma ** t) * r
+                t += 1
+
+        mean_eval, std = np.mean(ep_returns), np.std(ep_returns)
+        min_eval, max_eval = np.amin(ep_returns), np.amax(ep_returns)
+        base_stats = [mean_eval, std, min_eval, max_eval]
+
+        percentile_stats = []
+        full_dist = []
+
+        for p in percentile:
+            percentile_stats.append(np.percentile(ep_returns, p))
+
+        if get_full_dist == True:
+            full_dist = ep_returns
+
+        return [base_stats, percentile_stats, full_dist]

From 7b32d4d342f55cb6a9834c7b2426d26812def450 Mon Sep 17 00:00:00 2001
From: Divye Pratap Jain <divye@D-10-18-171-180.dhcp4.washington.edu>
Date: Fri, 17 May 2019 22:29:23 -0700
Subject: [PATCH 2/2] NIT to gym_env

---
 mjrl/utils/gym_env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mjrl/utils/gym_env.py b/mjrl/utils/gym_env.py
index de575b5..eac628c 100644
--- a/mjrl/utils/gym_env.py
+++ b/mjrl/utils/gym_env.py
@@ -1,6 +1,6 @@
 import gym
 import numpy as np
-import tqdm
+from tqdm import tqdm
 
 class EnvSpec(object):
     def __init__(self, obs_dim, act_dim, horizon, num_agents):
@@ -157,7 +157,7 @@ def evaluate_policy_vil(self, policy,
             path_image_pixels = []
             t, done = 0, False
             while t < horizon and not (done and terminate_at_done):
-                image_pix = self.get_pixels(frame_size=frame_size, camera_name=camera_name, device_id=device_id)
+                image_pix = self.env.env.get_pixels(frame_size=frame_size, camera_name=camera_name, device_id=device_id)
                 img = image_pix
                 prev_img = image_pix
                 prev_prev_img = image_pix