added workaround for gym.make selecting patient name

jxx123 · Jan 11, 2018 · d0287da · d0287da
1 parent 8667b80
commit d0287da
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -10,27 +10,8 @@ This simulator is a python implementation of the FDA-approved [UVa/Padova Simula
 | ![animation screenshot](https://github.com/jxx123/simglucose/blob/master/screenshots/animate.png) | ![CVGA](https://github.com/jxx123/simglucose/blob/master/screenshots/CVGA.png) | ![BG Trace Plot](https://github.com/jxx123/simglucose/blob/master/screenshots/BG_trace_plot.png) | ![Risk Index Stats](https://github.com/jxx123/simglucose/blob/master/screenshots/risk_index.png) |
 
   <!-- ![Zone Stats](https://github.com/jxx123/simglucose/blob/master/screenshots/zone_stats.png) -->
-## Release Notes, 1/7/2017
-- Added OpenAI gym support, use `gym.make('simglucose-v0')` to make the enviroment.
-For example, 
-```python
-import gym
-import simglucose
-env = gym.make('simglucose-v0')
 
-observation, reward, done, info = env.reset()
-for t in range(100):
-    env.render()
-    print(observation)
-    action = env.action_space.sample()
-    observation, reward, done, info = env.step(action)
-    if done:
-        print("Episode finished after {} timesteps".format(t + 1))
-        break
-```
-
-- Noticed issue: the patient name selection is not available in gym.make for now. The patient name has to be hard-coded in the constructor of `simglucose.envs.T1DSimEnv`.
-## Release Notes, 12/31/2017
+## Main Features
 - Simulation enviroment follows [OpenAI gym](https://github.com/openai/gym) and [rllab](https://github.com/rll/rllab) APIs. It returns observation, reward, done, info at each step, which means the simulator is "reinforcement-learning-ready".
 - The reward at each step is `risk[t-1] - risk[t]`. Customized reward is not supported for now. `risk[t]` is the risk index at time `t` defined in this [paper](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2903980/pdf/dia.2008.0138.pdf). 
 - Supports parallel computing. The simulator simulates mutliple patients parallelly using [pathos multiprocessing package](https://github.com/uqfoundation/pathos) (you are free to turn parallel off by setting `parallel=False`).
@@ -67,6 +48,7 @@ If [rllab (optional)](https://github.com/rll/rllab) is installed, the package wi
 Note: there might be some minor differences between auto install version and manual install version. Use `git clone` and manual installation to get the latest version.
 
 ## Quick Start
+### Use simglucose as a simulator and test controllers
 Run the simulator user interface
 ```python
 from simglucose.simulation.user_interface import simulate
@@ -128,6 +110,40 @@ simulate(sim_time=my_sim_time,
          animate=False,
          parallel=True)
 ```
+### OpenAI gym Usage
+```python
+import gym
+
+# Register gym environment. By specifying kwargs,
+# you are able to choose which patient to simulate.
+# patient_name must be 'adolescent#001' to 'adolescent#010',
+# or 'adult#001' to 'adult#010', or 'child#001' to 'child#010'
+from gym.envs.registration import register
+register(
+    id='simglucose-adolescent2-v0',
+    entry_point='simglucose.envs:T1DSimEnv',
+    kwargs={'patient_name': 'adolescent#002'}
+)
+
+env = gym.make('simglucose-adolescent2-v0')
+
+observation, reward, done, info = env.reset()
+for t in range(100):
+    env.render()
+    print(observation)
+    # Action in the gym environment is a scalar
+    # representing the basal insulin, which differs from
+    # the regular controller action outside the gym
+    # environment (a tuple (basal, bolus)).
+    # In the perfect situation, the agent should be able
+    # to control the glucose only through basal instead
+    # of asking patient to take bolus
+    action = env.action_space.sample()
+    observation, reward, done, info = env.step(action)
+    if done:
+        print("Episode finished after {} timesteps".format(t + 1))
+        break
+```
 
 ## Advanced Usage
 You can create the simulation objects, and run batch simulation. For example,
@@ -212,3 +228,8 @@ name = [_f[:-4] for _f in filename]   # get the filename without extension
 df = pd.concat([pd.read_csv(f, index_col=0) for f in filename], keys=name)
 report(df)
 ```
+## Release Notes, 1/10/2017
+- Added workaround to select patient when make gym environment: register gym environment by passing kwargs of patient_name.
+## Release Notes, 1/7/2017
+- Added OpenAI gym support, use `gym.make('simglucose-v0')` to make the enviroment.
+- Noticed issue: the patient name selection is not available in gym.make for now. The patient name has to be hard-coded in the constructor of `simglucose.envs.T1DSimEnv`.
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 
 setup(name='simglucose',
-      version='0.1.5',
+      version='0.1.6',
       description='A Type-1 Diabetes Simulator as a Reinforcement Learning Environment in OpenAI gym or rllab (python implementation of UVa/Padova Simulator)',
       url='https://github.com/jxx123/simglucose',
       author='Jinyu Xie',

diff --git a/simglucose/envs/simglucose_gym_env.py b/simglucose/envs/simglucose_gym_env.py
@@ -27,20 +27,23 @@ def __init__(self, patient_name=None):
         patient_name must be 'adolescent#001' to 'adolescent#010',
         or 'adult#001' to 'adult#010', or 'child#001' to 'child#010'
         '''
+        seeds = self._seed()
+        # have to hard code the patient_name, gym has some interesting
+        # error when choosing the patient
         if patient_name is None:
-            # patient_name = self.pick_patient()
-
-            # have to hard code the patient_name, gym has some interesting
-            # error when choosing the patient
             patient_name = 'adolescent#001'
         patient = T1DPatient.withName(patient_name)
-        sensor = CGMSensor.withName('Dexcom')
-        scenario = RandomScenario(start_time=datetime(2018, 1, 1, 0, 0, 0))
+        sensor = CGMSensor.withName('Dexcom', seed=seeds[1])
+        hour = self.np_random.randint(low=0.0, high=24.0)
+        start_time = datetime(2018, 1, 1, hour, 0, 0)
+        scenario = RandomScenario(start_time=start_time, seed=seeds[2])
         pump = InsulinPump.withName('Insulet')
         self.env = _T1DSimEnv(patient, sensor, pump, scenario)
 
     @staticmethod
     def pick_patient():
+        # TODO: cannot be used to pick patient at the env constructing space
+        # for now
         patient_params = pd.read_csv(PATIENT_PARA_FILE)
         while True:
             print('Select patient:')
@@ -67,14 +70,13 @@ def _reset(self):
         return self.env.reset()
 
     def _seed(self, seed=None):
-        rng, seed1 = seeding.np_random(seed=seed)
+        self.np_random, seed1 = seeding.np_random(seed=seed)
         # Derive a random seed. This gets passed as a uint, but gets
         # checked as an int elsewhere, so we need to keep it below
         # 2**31.
         seed2 = seeding.hash_seed(seed1 + 1) % 2**31
-        self.env.sensor.seed = seed1
-        self.env.scenario.seed = seed2
-        return [seed1, seed2]
+        seed3 = seeding.hash_seed(seed2 + 1) % 2**31
+        return [seed1, seed2, seed3]
 
     def _render(self, mode='human', close=False):
         self.env.render(close=close)

diff --git a/tests/test_gym.py b/tests/test_gym.py
@@ -1,17 +1,27 @@
 import gym
-import simglucose
 import unittest
+from simglucose.controller.basal_bolus_ctrller import BBController
 
 
 class TestGym(unittest.TestCase):
     def test_gym_random_agent(self):
-        env = gym.make('simglucose-v0')
+        from gym.envs.registration import register
+        register(
+            id='simglucose-adolescent2-v0',
+            entry_point='simglucose.envs:T1DSimEnv',
+            kwargs={'patient_name': 'adolescent#002'}
+        )
 
-        observation = env.reset()
-        for t in range(100):
+        env = gym.make('simglucose-adolescent2-v0')
+        ctrller = BBController()
+
+        observation, reward, done, info = env.reset()
+        for t in range(200):
             env.render()
             print(observation)
-            action = env.action_space.sample()
+            # action = env.action_space.sample()
+            ctrl_action = ctrller.policy(observation, reward, done, **info)
+            action = ctrl_action.basal + ctrl_action.bolus
             observation, reward, done, info = env.step(action)
             if done:
                 print("Episode finished after {} timesteps".format(t + 1))