Skip to content

Commit

Permalink
modified reset output, added rllab example
Browse files Browse the repository at this point in the history
  • Loading branch information
jxx123 committed Jan 29, 2018
1 parent d0287da commit 6b5d74a
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 88 deletions.
54 changes: 51 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ class MyController(Controller):
Every controller must have this implementation!
----
Inputs:
observation - a namedtuple defined in simglucose.simulation.env. It has
CHO and CGM two entries.
observation - a namedtuple defined in simglucose.simulation.env. For
now, it only has one entry: blood glucose level measured
by CGM sensor.
reward - current reward returned by environment
done - True, game over. False, game continues
info - additional information as key word arguments,
Expand Down Expand Up @@ -110,7 +111,7 @@ simulate(sim_time=my_sim_time,
animate=False,
parallel=True)
```
### OpenAI gym Usage
### OpenAI Gym usage
```python
import gym

Expand Down Expand Up @@ -145,6 +146,53 @@ for t in range(100):
break
```

### rllab usage
```python
from rllab.algos.ddpg import DDPG
from rllab.envs.normalized_env import normalize
from rllab.exploration_strategies.ou_strategy import OUStrategy
from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
from rllab.envs.gym_env import GymEnv
from gym.envs.registration import register

register(
id='simglucose-adolescent2-v0',
entry_point='simglucose.envs:T1DSimEnv',
kwargs={'patient_name': 'adolescent#002'}
)

env = GymEnv('simglucose-adolescent2-v0')
env = normalize(env)

policy = DeterministicMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)

es = OUStrategy(env_spec=env.spec)

qf = ContinuousMLPQFunction(env_spec=env.spec)

algo = DDPG(
env=env,
policy=policy,
es=es,
qf=qf,
batch_size=32,
max_path_length=100,
epoch_length=1000,
min_pool_size=10000,
n_epochs=1000,
discount=0.99,
scale_reward=0.01,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4
)
algo.train()
```

## Advanced Usage
You can create the simulation objects, and run batch simulation. For example,
```python
Expand Down
5 changes: 3 additions & 2 deletions examples/apply_customized_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ def policy(self, observation, reward, done, **info):
Every controller must have this implementation!
----
Inputs:
observation - a namedtuple defined in simglucose.simulation.env. It has
CHO and CGM two entries.
observation - a namedtuple defined in simglucose.simulation.env. For
now, it only has one entry: blood glucose level measured
by CGM sensor.
reward - current reward returned by environment
done - True, game over. False, game continues
info - additional information as key word arguments,
Expand Down
31 changes: 31 additions & 0 deletions examples/run_gym.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import gym

# Register gym environment. By specifying kwargs,
# you are able to choose which patient to simulate.
# patient_name must be 'adolescent#001' to 'adolescent#010',
# or 'adult#001' to 'adult#010', or 'child#001' to 'child#010'
from gym.envs.registration import register
register(
id='simglucose-adolescent2-v0',
entry_point='simglucose.envs:T1DSimEnv',
kwargs={'patient_name': 'adolescent#002'}
)

env = gym.make('simglucose-adolescent2-v0')

observation, reward, done, info = env.reset()
for t in range(100):
env.render()
print(observation)
# Action in the gym environment is a scalar
# representing the basal insulin, which differs from
# the regular controller action outside the gym
# environment (a tuple (basal, bolus)).
# In the perfect situation, the agent should be able
# to control the glucose only through basal instead
# of asking patient to take bolus
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
print("Episode finished after {} timesteps".format(t + 1))
break
43 changes: 43 additions & 0 deletions examples/run_rllab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from rllab.algos.ddpg import DDPG
from rllab.envs.normalized_env import normalize
from rllab.exploration_strategies.ou_strategy import OUStrategy
from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
from rllab.envs.gym_env import GymEnv
from gym.envs.registration import register

register(
id='simglucose-adolescent2-v0',
entry_point='simglucose.envs:T1DSimEnv',
kwargs={'patient_name': 'adolescent#002'}
)

env = GymEnv('simglucose-adolescent2-v0')
env = normalize(env)

policy = DeterministicMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)

es = OUStrategy(env_spec=env.spec)

qf = ContinuousMLPQFunction(env_spec=env.spec)

algo = DDPG(
env=env,
policy=policy,
es=es,
qf=qf,
batch_size=32,
max_path_length=100,
epoch_length=1000,
min_pool_size=10000,
n_epochs=1000,
discount=0.99,
scale_reward=0.01,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4
)
algo.train()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


setup(name='simglucose',
version='0.1.6',
version='0.1.7',
description='A Type-1 Diabetes Simulator as a Reinforcement Learning Environment in OpenAI gym or rllab (python implementation of UVa/Padova Simulator)',
url='https://github.com/jxx123/simglucose',
author='Jinyu Xie',
Expand Down
7 changes: 5 additions & 2 deletions simglucose/analysis/risk.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import warnings


def risk_index(BG, horizon):
Expand All @@ -8,7 +9,9 @@ def risk_index(BG, horizon):
fBG = 1.509 * (np.log(BG_to_compute)**1.084 - 5.381)
rl = 10 * fBG[fBG < 0]**2
rh = 10 * fBG[fBG > 0]**2
LBGI = np.nan_to_num(np.mean(rl))
HBGI = np.nan_to_num(np.mean(rh))
with warnings.catch_warnings():
warnings.simplefilter('ignore')
LBGI = np.nan_to_num(np.mean(rl))
HBGI = np.nan_to_num(np.mean(rh))
RI = LBGI + HBGI
return (LBGI, HBGI, RI)
17 changes: 0 additions & 17 deletions simglucose/requirements.txt

This file was deleted.

65 changes: 2 additions & 63 deletions simglucose/simulation/env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
from simglucose.patient.t1dpatient import Action
from simglucose.analysis.risk import risk_index
import pandas as pd
Expand All @@ -7,15 +6,9 @@
from collections import namedtuple
from simglucose.simulation.rendering import Viewer

rllab = True
try:
from rllab.envs.base import Env
from rllab.envs.base import Step
from rllab.spaces import Box
except ImportError:
rllab = False
print('You could use rllab features, if you have rllab module.')

_Step = namedtuple("Step",
["observation", "reward", "done", "info"])

Expand All @@ -27,21 +20,11 @@ def Step(observation, reward, done, **kwargs):
"""
return _Step(observation, reward, done, kwargs)

class Env(object):
def __init__():
pass

def step(self, action):
raise NotImplementedError

def reset(self):
raise NotImplementedError

Observation = namedtuple('Observation', ['CGM'])
logger = logging.getLogger(__name__)


class T1DSimEnv(Env):
class T1DSimEnv(object):
def __init__(self,
patient,
sensor,
Expand Down Expand Up @@ -147,28 +130,7 @@ def reset(self):
self._reset()
CGM = self.sensor.measure(self.patient)
obs = Observation(CGM=CGM)
return Step(observation=obs,
reward=0,
done=False,
sample_time=self.sample_time,
patient_name=self.patient.name,
meal=0)

@property
def action_space(self):
if rllab:
ub = np.array([self.pump._params['max_basal'],
self.pump._params['max_bolus']])
return Box(low=np.array([0, 0]), high=ub)
else:
pass

@property
def observation_space(self):
if rllab:
return Box(low=0, high=np.inf, shape=(1,))
else:
pass
return obs

def render(self, close=False):
if close:
Expand All @@ -194,26 +156,3 @@ def show_history(self):
df['Risk'] = pd.Series(self.risk_hist)
df = df.set_index('Time')
return df


def adjust_ylim(ax, ymin, ymax):
ylim = ax.get_ylim()
update = False

if ymin < ylim[0]:
y1 = ymin - 0.1 * abs(ymin)
update = True
else:
y1 = ylim[0]

if ymax > ylim[1]:
y2 = ymax + 0.1 * abs(ymax)
update = True
else:
y2 = ylim[1]

if update:
ax.set_ylim([y1, y2])
for spine in ax.spines.values():
ax.draw_artist(spine)
ax.draw_artist(ax.yaxis)
1 change: 1 addition & 0 deletions tests/test_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def test_rendering(self):
for i in range(len(self.df)):
df_tmp = self.df.iloc[0:(i + 1), :]
viewer.render(df_tmp)
viewer.close()


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 6b5d74a

Please sign in to comment.