From 1a29011da452069d77858afb2a2c96f0c8aee2a2 Mon Sep 17 00:00:00 2001
From: Xavier Bouthillier <xavier.bouthillier@gmail.com>
Date: Wed, 20 May 2020 12:02:48 -0400
Subject: [PATCH] Add hashing to environment (#94)

Why:

Hashing an environment makes it possible to verify if two initial states
are identical. This is necessary if we want to create groups of initial
states that are mutually exclusive.

How:

Compute the hash of the grid encoding with the initial agent position and
direction.

Note:

I'm not sure if the RNG state may eventually cause divergences in the
episodes and whether that should be accounted for in the hashing. It's
difficult to measure however the divergence without playing, and taking
into account the RNG state will likely give different hashing for
practically identical states. That's why my feeling is the RNG state should not
be part of the hashing.
---
 gym_minigrid/minigrid.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gym_minigrid/minigrid.py b/gym_minigrid/minigrid.py
index ab39662d9..fbff477ef 100644
--- a/gym_minigrid/minigrid.py
+++ b/gym_minigrid/minigrid.py
@@ -1,4 +1,5 @@
 import math
+import hashlib
 import gym
 from enum import IntEnum
 import numpy as np
@@ -733,6 +734,18 @@ def seed(self, seed=1337):
         self.np_random, _ = seeding.np_random(seed)
         return [seed]
 
+    def hash(self, size=16):
+        """Compute a hash that uniquely identifies the current state of the environment.
+        :param size: Size of the hashing
+        """
+        sample_hash = hashlib.sha256()
+
+        to_encode = [self.grid.encode(), self.agent_pos, self.agent_dir]
+        for item in to_encode:
+            sample_hash.update(str(item).encode('utf8'))
+
+        return sample_hash.hexdigest()[:size]
+
     @property
     def steps_remaining(self):
         return self.max_steps - self.step_count