From 1a29011da452069d77858afb2a2c96f0c8aee2a2 Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Wed, 20 May 2020 12:02:48 -0400 Subject: [PATCH] Add hashing to environment (#94) Why: Hashing an environment makes it possible to verify if two initial states are identical. This is necessary if we want to create groups of initial states that are mutually exclusive. How: Compute the hash of the grid encoding with the initial agent position and direction. Note: I'm not sure if the RNG state may eventually cause divergences in the episodes and whether that should be accounted for in the hashing. It's difficult to measure however the divergence without playing, and taking into account the RNG state will likely give different hashing for practically identical states. That's why my feeling is the RNG state should not be part of the hashing. --- gym_minigrid/minigrid.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gym_minigrid/minigrid.py b/gym_minigrid/minigrid.py index ab39662d9..fbff477ef 100644 --- a/gym_minigrid/minigrid.py +++ b/gym_minigrid/minigrid.py @@ -1,4 +1,5 @@ import math +import hashlib import gym from enum import IntEnum import numpy as np @@ -733,6 +734,18 @@ def seed(self, seed=1337): self.np_random, _ = seeding.np_random(seed) return [seed] + def hash(self, size=16): + """Compute a hash that uniquely identifies the current state of the environment. + :param size: Size of the hashing + """ + sample_hash = hashlib.sha256() + + to_encode = [self.grid.encode(), self.agent_pos, self.agent_dir] + for item in to_encode: + sample_hash.update(str(item).encode('utf8')) + + return sample_hash.hexdigest()[:size] + @property def steps_remaining(self): return self.max_steps - self.step_count