Skip to content

Commit

Permalink
add JiDi evaluation support
Browse files Browse the repository at this point in the history
add JiDi evaluation support
  • Loading branch information
huangshiyu13 authored Aug 29, 2023
2 parents f52859d + 06917dc commit 7df4dc1
Show file tree
Hide file tree
Showing 18 changed files with 1,159 additions and 43 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,4 @@ opponent_pool
!/examples/selfplay/opponent_templates/tictactoe_opponent/info.json
wandb_run
examples/dmc/new.gif
/examples/snake/submissions/rl/actor_2000.pth
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test:
lint:
$(call check_install, ruff)
ruff ${PYTHON_FILES} --select=E9,F63,F7,F82 --show-source
ruff ${PYTHON_FILES} --exit-zero | grep -v '501\|405\|401\|402\|403'
ruff ${PYTHON_FILES} --exit-zero | grep -v '501\|405\|401\|402\|403\|722'

format:
$(call check_install, isort)
Expand Down
5 changes: 5 additions & 0 deletions examples/snake/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ This is the example for the snake game.
python train_selfplay.py
```

## Evaluate JiDi submissions locally

```bash
python jidi_eval.py
```

## Submit to JiDi

Expand Down
54 changes: 54 additions & 0 deletions examples/snake/jidi_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""

from openrl.arena import make_arena
from openrl.arena.agents.jidi_agent import JiDiAgent
from openrl.envs.wrappers.pettingzoo_wrappers import RecordWinner


def run_arena(
render: bool = False,
parallel: bool = True,
seed=0,
total_games: int = 10,
max_game_onetime: int = 5,
):
env_wrappers = [RecordWinner]

player_num = 3
arena = make_arena(
f"snakes_{player_num}v{player_num}", env_wrappers=env_wrappers, render=render
)

agent1 = JiDiAgent("./submissions/rule_v1", player_num=player_num)
agent2 = JiDiAgent("./submissions/rl", player_num=player_num)

arena.reset(
agents={"agent1": agent1, "agent2": agent2},
total_games=total_games,
max_game_onetime=max_game_onetime,
seed=seed,
)
result = arena.run(parallel=parallel)
arena.close()
print(result)
return result


if __name__ == "__main__":
run_arena(render=False, parallel=True, seed=0, total_games=100, max_game_onetime=5)
1 change: 1 addition & 0 deletions examples/snake/submissions/random_agent/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ def my_controller(observation, action_space, is_act_continuous):
for i in range(len(action_space)):
player = sample_single_dim(action_space[i], is_act_continuous)
joint_action.append(player)

return joint_action
3 changes: 3 additions & 0 deletions examples/snake/submissions/rl/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Download actor weight

Please download [actor_2000.pth](https://github.com/CarlossShi/Competition_3v3snakes/tree/master/agent/rl) before use this code.
196 changes: 196 additions & 0 deletions examples/snake/submissions/rl/submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import os
import sys
from pathlib import Path

import numpy as np
import torch
from torch import nn
from torch.distributions import Categorical

HIDDEN_SIZE = 256
device = torch.device("cpu")

from typing import Union

Activation = Union[str, nn.Module]

_str_to_activation = {
"relu": torch.nn.ReLU(),
"tanh": nn.Tanh(),
"identity": nn.Identity(),
"softmax": nn.Softmax(dim=-1),
}


def mlp(
sizes, activation: Activation = "relu", output_activation: Activation = "identity"
):
if isinstance(activation, str):
activation = _str_to_activation[activation]
if isinstance(output_activation, str):
output_activation = _str_to_activation[output_activation]

layers = []
for i in range(len(sizes) - 1):
act = activation if i < len(sizes) - 2 else output_activation
layers += [nn.Linear(sizes[i], sizes[i + 1]), act]
return nn.Sequential(*layers)


def get_surrounding(state, width, height, x, y):
surrounding = [
state[(y - 1) % height][x], # up
state[(y + 1) % height][x], # down
state[y][(x - 1) % width], # left
state[y][(x + 1) % width],
] # right

return surrounding


def make_grid_map(
board_width, board_height, beans_positions: list, snakes_positions: dict
):
snakes_map = [[[0] for _ in range(board_width)] for _ in range(board_height)]
for index, pos in snakes_positions.items():
for p in pos:
snakes_map[p[0]][p[1]][0] = index

for bean in beans_positions:
snakes_map[bean[0]][bean[1]][0] = 1

return snakes_map


# Self position: 0:head_x; 1:head_y
# Head surroundings: 2:head_up; 3:head_down; 4:head_left; 5:head_right
# Beans positions: (6, 7) (8, 9) (10, 11) (12, 13) (14, 15)
# Other snake positions: (16, 17) (18, 19) (20, 21) (22, 23) (24, 25) -- (other_x - self_x, other_y - self_y)
def get_observations(state, agents_index, obs_dim, height, width):
state_copy = state.copy()
board_width = state_copy["board_width"]
board_height = state_copy["board_height"]
beans_positions = state_copy[1]
snakes_positions = {
key: state_copy[key] for key in state_copy.keys() & {2, 3, 4, 5, 6, 7}
}
snakes_positions_list = []
for key, value in snakes_positions.items():
snakes_positions_list.append(value)
snake_map = make_grid_map(
board_width, board_height, beans_positions, snakes_positions
)
state_ = np.array(snake_map)
state_ = np.squeeze(state_, axis=2)

observations = np.zeros((3, obs_dim))
snakes_position = np.array(snakes_positions_list, dtype=object)
beans_position = np.array(beans_positions, dtype=object).flatten()
for i, element in enumerate(agents_index):
# # self head position
observations[i][:2] = snakes_positions_list[element][0][:]

# head surroundings
head_x = snakes_positions_list[element][0][1]
head_y = snakes_positions_list[element][0][0]

head_surrounding = get_surrounding(state_, width, height, head_x, head_y)
observations[i][2:6] = head_surrounding[:]

# beans positions
observations[i][6:16] = beans_position[:]

# other snake positions
snake_heads = np.array([snake[0] for snake in snakes_position])
snake_heads = np.delete(snake_heads, i, 0)
observations[i][16:] = snake_heads.flatten()[:]
return observations


class Actor(nn.Module):
def __init__(self, obs_dim, act_dim, num_agents, args, output_activation="softmax"):
super().__init__()

self.obs_dim = obs_dim
self.act_dim = act_dim
self.num_agents = num_agents

self.args = args

sizes_prev = [obs_dim, HIDDEN_SIZE]
sizes_post = [HIDDEN_SIZE, HIDDEN_SIZE, act_dim]

self.prev_dense = mlp(sizes_prev)
self.post_dense = mlp(sizes_post, output_activation=output_activation)

def forward(self, obs_batch):
out = self.prev_dense(obs_batch)
out = self.post_dense(out)
return out


class RLAgent(object):
def __init__(self, obs_dim, act_dim, num_agent):
self.obs_dim = obs_dim
self.act_dim = act_dim
self.num_agent = num_agent
self.device = device
self.output_activation = "softmax"
self.actor = Actor(obs_dim, act_dim, num_agent, self.output_activation).to(
self.device
)

def choose_action(self, obs):
obs = torch.Tensor([obs]).to(self.device)
logits = self.actor(obs).cpu().detach().numpy()[0]
return logits

def select_action_to_env(self, obs, ctrl_index):
logits = self.choose_action(obs)
actions = logits2action(logits)
action_to_env = to_joint_action(actions, ctrl_index)
return action_to_env

def load_model(self, filename):
self.actor.load_state_dict(torch.load(filename))


def to_joint_action(action, ctrl_index):
joint_action_ = []
action_a = action[ctrl_index]
each = [0] * 4
each[action_a] = 1
joint_action_.append(each)
return joint_action_


def logits2action(logits):
logits = torch.Tensor(logits).to(device)
actions = np.array([Categorical(out).sample().item() for out in logits])
return np.array(actions)


agent = RLAgent(26, 4, 3)
actor_net = os.path.dirname(os.path.abspath(__file__)) + "/actor_2000.pth"
assert Path(actor_net).exists(), (
"actor_2000.pth not exists, please download from:"
" https://github.com/CarlossShi/Competition_3v3snakes/tree/master/agent/rl"
)
agent.load_model(actor_net)


def my_controller(observation_list, action_space_list, is_act_continuous):
obs_dim = 26
obs = observation_list.copy()
board_width = obs["board_width"]
board_height = obs["board_height"]
o_index = obs[
"controlled_snake_index"
] # 2, 3, 4, 5, 6, 7 -> indexs = [0,1,2,3,4,5]
o_indexs_min = 3 if o_index > 4 else 0
indexs = [o_indexs_min, o_indexs_min + 1, o_indexs_min + 2]
observation = get_observations(
obs, indexs, obs_dim, height=board_height, width=board_width
)
actions = agent.select_action_to_env(observation, indexs.index(o_index - 2))
return actions
Loading

0 comments on commit 7df4dc1

Please sign in to comment.