instadeepai · sash-a · Jul 23, 2024 · Jun 10, 2024 · Jun 10, 2024 · Jun 14, 2024
diff --git a/mava/configs/arch/anakin.yaml b/mava/configs/arch/anakin.yaml
@@ -1,4 +1,5 @@
 # --- Anakin config ---
+architecture_name: anakin
 
 # --- Training ---
 num_envs: 16  # Number of vectorised environments per device.

diff --git a/mava/configs/arch/sebulba.yaml b/mava/configs/arch/sebulba.yaml
@@ -0,0 +1,19 @@
+# --- Sebulba config ---
+architecture_name: sebulba
+
+# --- Training ---
+num_envs: 32  # number of environments per thread.
+
+# --- Evaluation ---
+evaluation_greedy: False # Evaluate the policy greedily. If True the policy will select
+  # an action which corresponds to the greatest logit. If false, the policy will sample
+  # from the logits.
+num_eval_episodes: 32 # Number of episodes to evaluate per evaluation.
+num_evaluation: 200 # Number of evenly spaced evaluations to perform during training.
+absolute_metric: True # Whether the absolute metric should be computed. For more details
+  # on the absolute metric please see: https://arxiv.org/abs/2209.10485
+
+# --- Sebulba devices config ---
+n_threads_per_executor: 1  # num of different threads/env batches per actor
+executor_device_ids: [0] # ids of actor devices
+learner_device_ids: [0] # ids of learner devices
diff --git a/mava/configs/env/lbf_gym.yaml b/mava/configs/env/lbf_gym.yaml
@@ -0,0 +1,19 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: gym-lbf-2s-8x8-2p-2f-coop # [gym-lbf-2s-8x8-2p-2f-coop, gym-lbf-8x8-2p-2f-coop, gym-lbf-2s-10x10-3p-3f, gym-lbf-10x10-3p-3f, gym-lbf-15x15-3p-5f, gym-lbf-15x15-4p-3f, gym-lbf-15x15-4p-5f]
+
+env_name: LevelBasedForaging  # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the add agents IDs to the observations returned by the environment.
+add_agent_id : False
+
+# Whether or not to log the winrate of this environment.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
diff --git a/mava/configs/env/rware_gym.yaml b/mava/configs/env/rware_gym.yaml
@@ -0,0 +1,19 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: gym-rware-tiny-2ag # [gym-rware-tiny-2ag, gym-rware-tiny-4ag, gym-rware-tiny-4ag-easy, gym-rware-small-4ag]
+
+env_name: RobotWarehouse  # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the add agents IDs to the observations returned by the environment.
+add_agent_id : False
+
+# Whether or not to log the winrate of this environment.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
diff --git a/mava/configs/env/scenario/gym-lbf-10x10-3p-3f.yaml b/mava/configs/env/scenario/gym-lbf-10x10-3p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 10x10-3p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 10x10-3p-3f
+
+task_config:
+  field_size: [10,10]
+  sight: 10
+  players: 3
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-3p-5f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-3p-5f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-3p-5f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-3p-5f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 3
+  max_num_food: 5
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-4p-3f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-4p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-4p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-4p-3f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 4
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-4p-5f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-4p-5f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-4p-5f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-4p-5f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 4
+  max_num_food: 5
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-2s-10x10-3p-3f.yaml b/mava/configs/env/scenario/gym-lbf-2s-10x10-3p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 2s10x10-3p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 2s-10x10-3p-3f
+
+task_config:
+  field_size: [10, 10]
+  sight: 2
+  players: 3
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-2s-8x8-2p-2f-coop.yaml b/mava/configs/env/scenario/gym-lbf-2s-8x8-2p-2f-coop.yaml
@@ -0,0 +1,18 @@
+# The config of the 2s-8x8-2p-2f-coop scenario with the VectorObserver set as default.
+name: LevelBasedForaging
+task_name: 2s-8x8-2p-2f-coop
+
+task_config:
+  field_size: [8, 8] # size of the grid to generate.
+  sight: 2 # field of view of an agent.
+  players: 2 # number of agents on the grid.
+  max_num_food: 2 # number of food in the environment.
+  max_player_level: 2 # maximum level of the agents (inclusive).
+  force_coop: True # force cooperation between agents.
+  max_episode_steps: 100 # max number of steps per episode.
+  min_player_level : 1 # minimum level of the agents (inclusive).
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-8x8-2p-2f-coop.yaml b/mava/configs/env/scenario/gym-lbf-8x8-2p-2f-coop.yaml
@@ -0,0 +1,18 @@
+# The config of the 8x8-2p-2f-coop scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 8x8-2p-2f-coop
+
+task_config:
+  field_size: [8, 8]
+  sight: 8
+  players: 2
+  max_num_food: 2
+  max_player_level: 2
+  force_coop: True
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-small-4ag.yaml b/mava/configs/env/scenario/gym-rware-small-4ag.yaml
@@ -0,0 +1,18 @@
+# The config of the small-4ag environment
+name: RobotWarehouse
+task_name: small-4ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 2
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 4
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-2ag.yaml b/mava/configs/env/scenario/gym-rware-tiny-2ag.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny-2ag environment
+name: RobotWarehouse
+task_name: tiny-2ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 2
+  sensor_range: 1
+  request_queue_size: 2
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-4ag-easy.yaml b/mava/configs/env/scenario/gym-rware-tiny-4ag-easy.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny-4ag-easy environment
+name: RobotWarehouse
+task_name: tiny-4ag-easy
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 8
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-4ag.yaml b/mava/configs/env/scenario/gym-rware-tiny-4ag.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny_4ag environment
+name: RobotWarehouse
+task_name: tiny-4ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 4
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/systems/ppo/ff_ippo.py → mava/systems/ppo/anakin/ff_ippo.py b/mava/systems/ppo/ff_ippo.py → mava/systems/ppo/anakin/ff_ippo.py
@@ -578,7 +578,7 @@ def run_experiment(_config: DictConfig) -> float:
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_ff_ippo.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_ff_ippo.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/ppo/ff_mappo.py → mava/systems/ppo/anakin/ff_mappo.py b/mava/systems/ppo/ff_mappo.py → mava/systems/ppo/anakin/ff_mappo.py
@@ -575,7 +575,7 @@ def run_experiment(_config: DictConfig) -> float:
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_ff_mappo.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_ff_mappo.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/ppo/rec_ippo.py → mava/systems/ppo/anakin/rec_ippo.py b/mava/systems/ppo/rec_ippo.py → mava/systems/ppo/anakin/rec_ippo.py
@@ -735,7 +735,7 @@ def run_experiment(_config: DictConfig) -> float:  # noqa: CCR001
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_rec_ippo.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_rec_ippo.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/ppo/rec_mappo.py → mava/systems/ppo/anakin/rec_mappo.py b/mava/systems/ppo/rec_mappo.py → mava/systems/ppo/anakin/rec_mappo.py
@@ -726,7 +726,9 @@ def run_experiment(_config: DictConfig) -> float:  # noqa: CCR001
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_rec_mappo.yaml", version_base="1.2")
+@hydra.main(
+    config_path="../../../configs", config_name="default_rec_mappo.yaml", version_base="1.2"
+)
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/ppo/sebulba/ff_ippo.py b/mava/systems/ppo/sebulba/ff_ippo.py
@@ -0,0 +1,13 @@
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/mava/systems/q_learning/rec_iql.py → mava/systems/q_learning/anakin/rec_iql.py b/mava/systems/q_learning/rec_iql.py → mava/systems/q_learning/anakin/rec_iql.py
@@ -645,7 +645,7 @@ def run_experiment(cfg: DictConfig) -> float:
     return float(eval_performance)
 
 
-@hydra.main(config_path="../../configs", config_name="default_rec_iql.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_rec_iql.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/sac/ff_isac.py → mava/systems/sac/anakin/ff_isac.py b/mava/systems/sac/ff_isac.py → mava/systems/sac/anakin/ff_isac.py
@@ -607,7 +607,7 @@ def run_experiment(cfg: DictConfig) -> float:
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_ff_isac.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_ff_isac.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/systems/sac/ff_masac.py → mava/systems/sac/anakin/ff_masac.py b/mava/systems/sac/ff_masac.py → mava/systems/sac/anakin/ff_masac.py
@@ -626,7 +626,7 @@ def run_experiment(cfg: DictConfig) -> float:
     return eval_performance
 
 
-@hydra.main(config_path="../../configs", config_name="default_ff_masac.yaml", version_base="1.2")
+@hydra.main(config_path="../../../configs", config_name="default_ff_masac.yaml", version_base="1.2")
 def hydra_entry_point(cfg: DictConfig) -> float:
     """Experiment entry point."""
     # Allow dynamic attributes.

diff --git a/mava/utils/logger.py b/mava/utils/logger.py
@@ -150,8 +150,11 @@ class NeptuneLogger(BaseLogger):
     def __init__(self, cfg: DictConfig, unique_token: str) -> None:
         tags = list(cfg.logger.kwargs.neptune_tag)
         project = cfg.logger.kwargs.neptune_project
+        mode = (
+            "async" if cfg.arch.architecture_name == "anakin" else "sync"
+        )  # async logging leads to deadlocks in sebulba
 
-        self.logger = neptune.init_run(project=project, tags=tags)
+        self.logger = neptune.init_run(project=project, tags=tags, mode=mode)
 
         self.logger["config"] = stringify_unsupported(cfg)
         self.detailed_logging = cfg.logger.kwargs.detailed_neptune_logging