modified reset output, added rllab example

jxx123 · Jan 29, 2018 · 6b5d74a · 6b5d74a
1 parent d0287da
commit 6b5d74a
Show file tree

Hide file tree

Showing 10 changed files with 235 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -71,8 +71,9 @@ class MyController(Controller):
         Every controller must have this implementation!
         ----
         Inputs:
-        observation - a namedtuple defined in simglucose.simulation.env. It has
-                      CHO and CGM two entries.
+        observation - a namedtuple defined in simglucose.simulation.env. For
+                      now, it only has one entry: blood glucose level measured
+                      by CGM sensor.
         reward      - current reward returned by environment
         done        - True, game over. False, game continues
         info        - additional information as key word arguments,
@@ -110,7 +111,7 @@ simulate(sim_time=my_sim_time,
          animate=False,
          parallel=True)
 ```
-### OpenAI gym Usage
+### OpenAI Gym usage
 ```python
 import gym
 
@@ -145,6 +146,53 @@ for t in range(100):
         break
 ```
 
+### rllab usage
+```python
+from rllab.algos.ddpg import DDPG
+from rllab.envs.normalized_env import normalize
+from rllab.exploration_strategies.ou_strategy import OUStrategy
+from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
+from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
+from rllab.envs.gym_env import GymEnv
+from gym.envs.registration import register
+
+register(
+    id='simglucose-adolescent2-v0',
+    entry_point='simglucose.envs:T1DSimEnv',
+    kwargs={'patient_name': 'adolescent#002'}
+)
+
+env = GymEnv('simglucose-adolescent2-v0')
+env = normalize(env)
+
+policy = DeterministicMLPPolicy(
+    env_spec=env.spec,
+    # The neural network policy should have two hidden layers, each with 32 hidden units.
+    hidden_sizes=(32, 32)
+)
+
+es = OUStrategy(env_spec=env.spec)
+
+qf = ContinuousMLPQFunction(env_spec=env.spec)
+
+algo = DDPG(
+    env=env,
+    policy=policy,
+    es=es,
+    qf=qf,
+    batch_size=32,
+    max_path_length=100,
+    epoch_length=1000,
+    min_pool_size=10000,
+    n_epochs=1000,
+    discount=0.99,
+    scale_reward=0.01,
+    qf_learning_rate=1e-3,
+    policy_learning_rate=1e-4
+)
+algo.train()
+```
+
 ## Advanced Usage
 You can create the simulation objects, and run batch simulation. For example,
 ```python

diff --git a/examples/apply_customized_controller.py b/examples/apply_customized_controller.py
@@ -12,8 +12,9 @@ def policy(self, observation, reward, done, **info):
         Every controller must have this implementation!
         ----
         Inputs:
-        observation - a namedtuple defined in simglucose.simulation.env. It has
-                      CHO and CGM two entries.
+        observation - a namedtuple defined in simglucose.simulation.env. For
+                      now, it only has one entry: blood glucose level measured
+                      by CGM sensor.
         reward      - current reward returned by environment
         done        - True, game over. False, game continues
         info        - additional information as key word arguments,

diff --git a/examples/run_gym.py b/examples/run_gym.py
@@ -0,0 +1,31 @@
+import gym
+
+# Register gym environment. By specifying kwargs,
+# you are able to choose which patient to simulate.
+# patient_name must be 'adolescent#001' to 'adolescent#010',
+# or 'adult#001' to 'adult#010', or 'child#001' to 'child#010'
+from gym.envs.registration import register
+register(
+    id='simglucose-adolescent2-v0',
+    entry_point='simglucose.envs:T1DSimEnv',
+    kwargs={'patient_name': 'adolescent#002'}
+)
+
+env = gym.make('simglucose-adolescent2-v0')
+
+observation, reward, done, info = env.reset()
+for t in range(100):
+    env.render()
+    print(observation)
+    # Action in the gym environment is a scalar
+    # representing the basal insulin, which differs from
+    # the regular controller action outside the gym
+    # environment (a tuple (basal, bolus)).
+    # In the perfect situation, the agent should be able
+    # to control the glucose only through basal instead
+    # of asking patient to take bolus
+    action = env.action_space.sample()
+    observation, reward, done, info = env.step(action)
+    if done:
+        print("Episode finished after {} timesteps".format(t + 1))
+        break
diff --git a/examples/run_rllab.py b/examples/run_rllab.py
@@ -0,0 +1,43 @@
+from rllab.algos.ddpg import DDPG
+from rllab.envs.normalized_env import normalize
+from rllab.exploration_strategies.ou_strategy import OUStrategy
+from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
+from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
+from rllab.envs.gym_env import GymEnv
+from gym.envs.registration import register
+
+register(
+    id='simglucose-adolescent2-v0',
+    entry_point='simglucose.envs:T1DSimEnv',
+    kwargs={'patient_name': 'adolescent#002'}
+)
+
+env = GymEnv('simglucose-adolescent2-v0')
+env = normalize(env)
+
+policy = DeterministicMLPPolicy(
+    env_spec=env.spec,
+    # The neural network policy should have two hidden layers, each with 32 hidden units.
+    hidden_sizes=(32, 32)
+)
+
+es = OUStrategy(env_spec=env.spec)
+
+qf = ContinuousMLPQFunction(env_spec=env.spec)
+
+algo = DDPG(
+    env=env,
+    policy=policy,
+    es=es,
+    qf=qf,
+    batch_size=32,
+    max_path_length=100,
+    epoch_length=1000,
+    min_pool_size=10000,
+    n_epochs=1000,
+    discount=0.99,
+    scale_reward=0.01,
+    qf_learning_rate=1e-3,
+    policy_learning_rate=1e-4
+)
+algo.train()
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 
 setup(name='simglucose',
-      version='0.1.6',
+      version='0.1.7',
       description='A Type-1 Diabetes Simulator as a Reinforcement Learning Environment in OpenAI gym or rllab (python implementation of UVa/Padova Simulator)',
       url='https://github.com/jxx123/simglucose',
       author='Jinyu Xie',

diff --git a/simglucose/analysis/risk.py b/simglucose/analysis/risk.py
@@ -1,4 +1,5 @@
 import numpy as np
+import warnings
 
 
 def risk_index(BG, horizon):
@@ -8,7 +9,9 @@ def risk_index(BG, horizon):
     fBG = 1.509 * (np.log(BG_to_compute)**1.084 - 5.381)
     rl = 10 * fBG[fBG < 0]**2
     rh = 10 * fBG[fBG > 0]**2
-    LBGI = np.nan_to_num(np.mean(rl))
-    HBGI = np.nan_to_num(np.mean(rh))
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore')
+        LBGI = np.nan_to_num(np.mean(rl))
+        HBGI = np.nan_to_num(np.mean(rh))
     RI = LBGI + HBGI
     return (LBGI, HBGI, RI)
diff --git a/simglucose/requirements.txt b/simglucose/requirements.txt
diff --git a/simglucose/simulation/env.py b/simglucose/simulation/env.py
@@ -1,4 +1,3 @@
-import numpy as np
 from simglucose.patient.t1dpatient import Action
 from simglucose.analysis.risk import risk_index
 import pandas as pd
@@ -7,15 +6,9 @@
 from collections import namedtuple
 from simglucose.simulation.rendering import Viewer
 
-rllab = True
 try:
-    from rllab.envs.base import Env
     from rllab.envs.base import Step
-    from rllab.spaces import Box
 except ImportError:
-    rllab = False
-    print('You could use rllab features, if you have rllab module.')
-
     _Step = namedtuple("Step",
                        ["observation", "reward", "done", "info"])
 
@@ -27,21 +20,11 @@ def Step(observation, reward, done, **kwargs):
         """
         return _Step(observation, reward, done, kwargs)
 
-    class Env(object):
-        def __init__():
-            pass
-
-        def step(self, action):
-            raise NotImplementedError
-
-        def reset(self):
-            raise NotImplementedError
-
 Observation = namedtuple('Observation', ['CGM'])
 logger = logging.getLogger(__name__)
 
 
-class T1DSimEnv(Env):
+class T1DSimEnv(object):
     def __init__(self,
                  patient,
                  sensor,
@@ -147,28 +130,7 @@ def reset(self):
         self._reset()
         CGM = self.sensor.measure(self.patient)
         obs = Observation(CGM=CGM)
-        return Step(observation=obs,
-                    reward=0,
-                    done=False,
-                    sample_time=self.sample_time,
-                    patient_name=self.patient.name,
-                    meal=0)
-
-    @property
-    def action_space(self):
-        if rllab:
-            ub = np.array([self.pump._params['max_basal'],
-                           self.pump._params['max_bolus']])
-            return Box(low=np.array([0, 0]), high=ub)
-        else:
-            pass
-
-    @property
-    def observation_space(self):
-        if rllab:
-            return Box(low=0, high=np.inf, shape=(1,))
-        else:
-            pass
+        return obs
 
     def render(self, close=False):
         if close:
@@ -194,26 +156,3 @@ def show_history(self):
         df['Risk'] = pd.Series(self.risk_hist)
         df = df.set_index('Time')
         return df
-
-
-def adjust_ylim(ax, ymin, ymax):
-    ylim = ax.get_ylim()
-    update = False
-
-    if ymin < ylim[0]:
-        y1 = ymin - 0.1 * abs(ymin)
-        update = True
-    else:
-        y1 = ylim[0]
-
-    if ymax > ylim[1]:
-        y2 = ymax + 0.1 * abs(ymax)
-        update = True
-    else:
-        y2 = ylim[1]
-
-    if update:
-        ax.set_ylim([y1, y2])
-        for spine in ax.spines.values():
-            ax.draw_artist(spine)
-        ax.draw_artist(ax.yaxis)
diff --git a/tests/test_render.py b/tests/test_render.py
@@ -18,6 +18,7 @@ def test_rendering(self):
         for i in range(len(self.df)):
             df_tmp = self.df.iloc[0:(i + 1), :]
             viewer.render(df_tmp)
+        viewer.close()
 
 
 if __name__ == '__main__':