Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hanabi #71

Merged
merged 6 commits into from
May 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pettingzoo/classic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .dou_dizhu import dou_dizhu as dou_dizhu_v0
from .gin_rummy import gin_rummy as gin_rummy_v0
from .go import go_env as go_v0
from .hanabi.hanabi import env as hanabi_v0
360 changes: 360 additions & 0 deletions pettingzoo/classic/hanabi/hanabi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
from typing import Optional, Dict, List, Union
import numpy as np
from gym import spaces
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector

"""
Wrapper class around google deepmind's hanabi.
"""


class env(AECEnv):
"""This class capsules endpoints provided within deepmind/hanabi-learning-environment/rl_env.py."""

metadata = {'render.modes': ['human']}

# set of all required params
required_keys: set = {
'colors',
'ranks',
'players',
'hand_size',
'max_information_tokens',
'max_life_tokens',
'observation_type',
'seed',
'random_start_player',
}

def __init__(self,
colors: int = 5,
ranks: int = 5,
players: int = 2,
hand_size: int = 2,
max_information_tokens: int = 8,
max_life_tokens: int = 3,
observation_type: int = 1,
seed: int = 1,
random_start_player: bool = False,
):

"""
Parameter descriptions :
- colors: int, Number of colors \in [2,5].
- ranks: int, Number of ranks \in [2,5].
- players: int, Number of players \in [2,5].
- hand_size: int, Hand size \in [2,5].
- max_information_tokens: int, Number of information tokens (>=0).
- max_life_tokens: int, Number of life tokens (>=1).
- observation_type: int.
0: Minimal observation.
1: First-order common knowledge observation.
- seed: int, Random seed.
- random_start_player: bool, Random start player.

Common game configurations:
Hanabi-Full (default) : {
"colors": 5,
"ranks": 5,
"players": 2,
"max_information_tokens": 8,
"max_life_tokens": 3,
"observation_type": 1,
"hand_size": 2
}

Hanabi-Small : {
"colors": 5,
"ranks": 5,
"players": 2,
"max_information_tokens":
"max_life_tokens":
"observation_type": 1}

Hanabi-Very-Small : {
"colors": 2,
"ranks": 5,
"players": 2,
"max_information_tokens":
"max_life_tokens":
"observation_type": 1}

"""

super(env, self).__init__()

# importing Hanabi and throw error message if pypi package is not installed correctly.
try:
from hanabi_learning_environment.rl_env import HanabiEnv, make

except ModuleNotFoundError:
print("Hanabi is not installed." +
"Run ´pip3 install hanabi_learning_environment´ from within your project environment." +
"Consult hanabi/README.md for detailed information.")

else:

# ToDo: Starts
# Check if all possible dictionary values are within a certain ranges.
self._raise_error_if_config_values_out_of_range(colors,
ranks,
players,
hand_size,
max_information_tokens,
max_life_tokens,
observation_type,
random_start_player)


self.hanabi_env: HanabiEnv = HanabiEnv(config={'colors': colors,
'ranks': ranks,
'players': players,
'hand_size': hand_size,
'max_information_tokens': max_information_tokens,
'max_life_tokens': max_life_tokens,
'observation_type': observation_type,
'random_start_player': random_start_player,
'seed': seed})


# List of agent names
self.agents = ["player_{}".format(i) for i in range(self.hanabi_env.players)]

self.agent_selection: str

# Sets hanabi game to clean state and updates all internal dictionaries
self.reset(observe=False)

# Set action_spaces and observation_spaces based on params in hanabi_env
self.action_spaces = {name: spaces.Discrete(self.hanabi_env.num_moves()) for name in self.agents}
self.observation_spaces = {player_name: spaces.Box(low=0,
high=1,
shape=(1,
1,
self.hanabi_env.vectorized_observation_shape()[
0]),
dtype=np.float32)
for player_name in self.agents}

@staticmethod
def _raise_error_if_config_values_out_of_range(colors, ranks, players, hand_size, max_information_tokens,
max_life_tokens, observation_type, random_start_player):

if not (2 <= colors <= 5):
raise ValueError(f'Config parameter {colors} is out of bounds. See description in hanabi.py.')

elif not (2 <= ranks <= 5):
raise ValueError(f'Config parameter {ranks} is out of bounds. See description in hanabi.py.')

elif not (2 <= players <= 5):
raise ValueError(f'Config parameter {players} is out of bounds. See description in hanabi.py.')

elif not (2 <= hand_size <= 5):
raise ValueError(f'Config parameter {hand_size} is out of bounds. See description in hanabi.py.')

elif not (0 <= max_information_tokens):
raise ValueError(
f'Config parameter {max_information_tokens} is out of bounds. See description in hanabi.py.')

elif not (1 <= max_life_tokens):
raise ValueError(f'Config parameter {max_life_tokens} is out of bounds. See description in hanabi.py.')

elif not (0 <= observation_type <= 1):
raise ValueError(f'Config parameter {observation_type} is out of bounds. See description in hanabi.py.')

@property
def observation_vector_dim(self):
return self.hanabi_env.vectorized_observation_shape()

@property
def num_agents(self):
return len(self.agents)

@property
def legal_moves(self) -> List[int]:
return self.infos[self.agent_selection]['legal_moves']

@property
def all_moves(self) -> List[int]:
return list(range(0, self.hanabi_env.num_moves()))

# ToDo: Fix Return value
def reset(self, observe=True) -> Optional[List[int]]:
""" Resets the environment for a new game and returns observations of current player as List of ints

Returns:
observation: Optional list of integers of length self.observation_vector_dim, describing observations of
current agent (agent_selection).
"""

# Reset underlying hanabi reinforcement learning environment
obs = self.hanabi_env.reset()

# Reset agent and agent_selection
self._reset_agents(player_number=obs['current_player'])

# Reset internal state
self._process_latest_observations(obs=obs)

# If specified, return observation of current agent
if observe:
return self.observe(agent_name=self.agent_selection)
else:
return None

def _reset_agents(self, player_number: int):
""" Rearrange self.agents as pyhanabi starts a different player after each reset(). """

# Shifts self.agents list as long order starting player is not according to player_number
while not self.agents[0] == 'player_' + str(player_number):
self.agents = self.agents[1:] + [self.agents[0]]

# Agent order list, on which the agent selector operates on.
self.agent_order = list(self.agents)
self._agent_selector = agent_selector(self.agent_order)

# Reset agent_selection
self.agent_selection = self._agent_selector.reset()

def _step_agents(self):
self.agent_selection = self._agent_selector.next()

def step(self, action: int, observe: bool = True, as_vector: bool = True) -> Optional[Union[np.ndarray,
List[List[dict]]]]:
""" Advances the environment by one step. Action must be within self.legal_moves, otherwise throws error.

Returns:
observation: Optional List of new observations of agent at turn after the action step is performed.
By default a list of integers, describing the logic state of the game from the view of the agent.
Can be a returned as a descriptive dictionary, if as_vector=False.
"""

agent_on_turn = self.agent_selection

if action not in self.legal_moves:
raise ValueError(f'Illegal action. Please choose between legal actions, as documented in dict self.infos')

else:
# Iterate agent_selection
self._step_agents()

# Apply action
all_observations, reward, done, _ = self.hanabi_env.step(action=action)

# Update internal state
self._process_latest_observations(obs=all_observations, reward=reward, done=done)

# Return latest observations if specified
if observe:
return self.observe(agent_name=agent_on_turn, as_vector=as_vector)

def observe(self, agent_name: str, as_vector: bool = True) -> Union[np.ndarray, List]:
if as_vector:
return np.array([[self.infos[agent_name]['observations_vectorized']]], np.int32)
else:
return self.infos[agent_name]['observations']

def _process_latest_observations(self, obs: Dict, reward: Optional[float] = 0, done: Optional[bool] = False):
"""Updates internal state"""

self.latest_observations = obs
self.rewards = {player_name: reward for player_name in self.agents}
self.dones = {player_name: done for player_name in self.agents}

# Here we have to deal with the player index with offset = 1
self.infos = {player_name: dict(legal_moves=self.latest_observations['player_observations']
[int(player_name[-1])]['legal_moves_as_int'],
legal_moves_as_dict=self.latest_observations['player_observations']
[int(player_name[-1])]['legal_moves'],
observations_vectorized=self.latest_observations['player_observations']
[int(player_name[-1])]['vectorized'],
observations=self.latest_observations['player_observations']
[int(player_name[-1])])
for player_name in self.agents}

def render(self, mode='human'):
""" Supports console print only. Prints the whole status dictionary.

Example:
{'current_player': 0,
'player_observations': [{'current_player': 0,
'current_player_offset': 0,
'deck_size': 40,
'discard_pile': [],
'fireworks': {'B': 0,
'G': 0,
'R': 0,
'W': 0,
'Y': 0},
'information_tokens': 8,
'legal_moves': [{'action_type': 'PLAY',
'card_index': 0},
{'action_type': 'PLAY',
'card_index': 1},
{'action_type': 'PLAY',
'card_index': 2},
{'action_type': 'PLAY',
'card_index': 3},
{'action_type': 'PLAY',
'card_index': 4},
{'action_type': 'REVEAL_COLOR',
'color': 'R',
'target_offset': 1},
{'action_type': 'REVEAL_COLOR',
'color': 'G',
'target_offset': 1},
{'action_type': 'REVEAL_COLOR',
'color': 'B',
'target_offset': 1},
{'action_type': 'REVEAL_RANK',
'rank': 0,
'target_offset': 1},
{'action_type': 'REVEAL_RANK',
'rank': 1,
'target_offset': 1},
{'action_type': 'REVEAL_RANK',
'rank': 2,
'target_offset': 1}],
'life_tokens': 3,
'observed_hands': [[{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1}],
[{'color': 'G', 'rank': 2},
{'color': 'R', 'rank': 0},
{'color': 'R', 'rank': 1},
{'color': 'B', 'rank': 0},
{'color': 'R', 'rank': 1}]],
'num_players': 2,
'vectorized': [ 0, 0, 1, ... ]},
{'current_player': 0,
'current_player_offset': 1,
'deck_size': 40,
'discard_pile': [],
'fireworks': {'B': 0,
'G': 0,
'R': 0,
'W': 0,
'Y': 0},
'information_tokens': 8,
'legal_moves': [],
'life_tokens': 3,
'observed_hands': [[{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1},
{'color': None, 'rank': -1}],
[{'color': 'W', 'rank': 2},
{'color': 'Y', 'rank': 4},
{'color': 'Y', 'rank': 2},
{'color': 'G', 'rank': 0},
{'color': 'W', 'rank': 1}]],
'num_players': 2,
'vectorized': [ 0, 0, 1, ... ]}]}
"""
print(self.latest_observations)

def close(self):
pass
Loading