diff --git a/docs/environments.md b/docs/environments.md index 0ab9b158..6c726bf4 100644 --- a/docs/environments.md +++ b/docs/environments.md @@ -144,6 +144,43 @@ Registered configurations: There are multiple colored boxes of random sizes in one large room. In order to get a reward, the agent must put the red box next to the yellow box. +# Sign + +Registered configurations: +- `MiniWorld-Sign-v0` + +

+ +

+ +There are 6 objects of color (red, green blue) and shape (key, box) in a +U-shaped maze. The agent starts on one side of the barrier in the U-shaped +maze, and on the other side of the barrier is a sign that says "blue," +"green," or "red." The sign is highlighted above in yellow. Additionally, the +state includes a goal that specifies either key or box. In order to get +reward, the agent must read the sign and go to the object with shape specified +by the goal and color specified by the sign. Going to any other object yields +-1 reward. + +Note that the state structure differs from the standard MiniWorld state. In +particular, the state is a dict where `state["obs"]` is the standard +observation, and `state["goal"]` is an additional int specifying key or box. + +This environment is from the paper [Decoupling Exploration and Exploitation +for Meta-Reinforcement Learning without Sacrifices](https://arxiv.org/abs/2008.02790). +If you use this environment, please cite this paper: + +``` +@inproceedings{liu2021decoupling, + title={Decoupling Exploration and Exploitation for Meta-Reinforcement Learning without Sacrifices}, + author={Liu, Evan Z and Raghunathan, Aditi and Liang, Percy and Finn, Chelsea}, + booktitle={International Conference on Machine Learning}, + pages={6925--6935}, + year={2021}, + organization={PMLR} +} +``` + # RemoteBot Registered configurations: diff --git a/gym_miniworld/envs/__init__.py b/gym_miniworld/envs/__init__.py index 0e5a1c2f..dab69fa2 100644 --- a/gym_miniworld/envs/__init__.py +++ b/gym_miniworld/envs/__init__.py @@ -17,6 +17,7 @@ from .collecthealth import * from .simtorealgoto import * from .simtorealpush import * +from .sign import * # Registered environment ids env_ids = [] diff --git a/gym_miniworld/envs/sign.py b/gym_miniworld/envs/sign.py new file mode 100644 index 00000000..ede04c39 --- /dev/null +++ b/gym_miniworld/envs/sign.py @@ -0,0 +1,132 @@ +import math +import gym + +from ..params import DEFAULT_PARAMS +from ..entity import Box, COLOR_NAMES, Key, MeshEnt, TextFrame +from ..miniworld import MiniWorldEnv + + +class BigKey(Key): + """A key with a bigger size for better visibility.""" + + def __init__(self, color, size=0.6): + assert color in COLOR_NAMES + MeshEnt.__init__( + self, + mesh_name='key_{}'.format(color), + height=size, + static=False + ) + + +class Sign(MiniWorldEnv): + """Sign environment from https://arxiv.org/abs/2008.02790. + + If you use this environment, please cite the above paper (Liu et al., 2020). + + Small U-shaped maze with 6 objects: (blue, red, green) x (key, box). + A sign on the wall says "blue", "green", or "red." + + In addition to the normal state, accessible under state["obs"], the state also + includes a goal under state["goal"] that specifies box or key. + + The episode ends when any object is touched. + Touching the object where the color matches the sign and the shape matches the + goal yields +1 reward. + Touching any other object yields -1 reward. + + The sign and goal can be configured via the color_index and goal arguments to + the constructor respectively. + + Includes an action to end the episode. + """ + + def __init__(self, size=10, max_episode_steps=20, color_index=0, goal=0): + """Constructs. + + Args: + size (int): size of the square room. + max_episode_steps (int): number of steps before the episode ends. + color_index (int): specifies whether the sign says blue (0), green (1), or + red (2). + goal (int): specifies box (0) or key (1). + """ + if color_index not in [0, 1, 2]: + raise ValueError("Only supported values for color_index are 0, 1, 2.") + + if goal not in [0, 1]: + raise ValueError("Only supported values for goal are 0, 1.") + + params = DEFAULT_PARAMS.no_random() + params.set('forward_step', 0.7) # larger steps + params.set('turn_step', 45) # 45 degree rotation + + self._size = size + self._goal = goal + self._color_index = color_index + + super().__init__( + params=params, max_episode_steps=max_episode_steps, domain_rand=False) + + # Allow for left / right / forward + custom end episode + self.action_space = gym.spaces.Discrete(self.actions.move_forward + 2) + + def set_color_index(self, color_index): + self._color_index = color_index + + def _gen_world(self): + gap_size = 0.25 + top_room = self.add_rect_room( + min_x=0, max_x=self._size, min_z=0, max_z=self._size * 0.65) + left_room = self.add_rect_room( + min_x=0, max_x=self._size * 3 / 5, min_z=self._size * 0.65 + gap_size, + max_z=self._size * 1.3) + right_room = self.add_rect_room( + min_x=self._size * 3 / 5, max_x=self._size, + min_z=self._size * 0.65 + gap_size, max_z=self._size * 1.3) + self.connect_rooms(top_room, left_room, min_x=0, max_x=self._size * 3 / 5) + self.connect_rooms( + left_room, right_room, min_z=self._size * 0.65 + gap_size, + max_z=self._size * 1.3) + + self._objects = [ + # Boxes + (self.place_entity(Box(color="blue"), pos=(1, 0, 1)), + self.place_entity(Box(color="red"), pos=(9, 0, 1)), + self.place_entity(Box(color="green"), pos=(9, 0, 5)), + ), + + # Keys + (self.place_entity(BigKey(color="blue"), pos=(5, 0, 1)), + self.place_entity(BigKey(color="red"), pos=(1, 0, 5)), + self.place_entity(BigKey(color="green"), pos=(1, 0, 9))), + ] + + text = ["BLUE", "RED", "GREEN"][self._color_index] + sign = TextFrame( + pos=[self._size, 1.35, self._size + gap_size], + dir=math.pi, + str=text, + height=1, + ) + self.entities.append(sign) + self.place_agent(min_x=4, max_x=5, min_z=4, max_z=6) + + def step(self, action): + obs, reward, done, info = super().step(action) + if action == self.actions.move_forward + 1: # custom end episode action + done = True + + for obj_index, object_pair in enumerate(self._objects): + for color_index, obj in enumerate(object_pair): + if self.near(obj): + done = True + reward = float(color_index == self._color_index and + obj_index == self._goal) * 2 - 1 + + state = {"obs": obs, "goal": self._goal} + return state, reward, done, info + + def reset(self): + obs = super().reset() + return {"obs": obs, "goal": self._goal} diff --git a/images/sign.jpg b/images/sign.jpg new file mode 100644 index 00000000..1b424d2b Binary files /dev/null and b/images/sign.jpg differ