-
Notifications
You must be signed in to change notification settings - Fork 3
/
simple_follower.py
105 lines (82 loc) · 3.94 KB
/
simple_follower.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""This file defines a follower that only understands a very simple language.
This is only useful for testing purposes. Theoretically, you could have
automated self-play with a simple_follower and simple_leader, but the data would
not be very diverse.
"""
import logging
from dataclasses import dataclass
from mashumaro.mixins.json import DataClassJSONMixin
from cb2game.agents.agent import Agent, Role
from cb2game.pyclient.game_endpoint import Action, GameState
logger = logging.getLogger(__name__)
@dataclass
class SimpleFollowerConfig(DataClassJSONMixin):
"""Configuration for a simple follower."""
default_action: str = "INSTRUCTION_DONE"
"""Which action to take if an instruction is received with no parsable commands.
Must be a valid value of Action.ActionCode enum, defined in pyclient.game_endpoint file.
"""
class SimpleFollower(Agent):
def __init__(self, config: SimpleFollowerConfig):
self.instructions_processed = set()
self.actions = []
self.config = config
# OVERRIDES role
def role(self) -> Role:
return Role.FOLLOWER
# OVERRIDES choose_action
def choose_action(self, game_state: GameState, action_mask=None) -> Action:
"""Chooses an action to take, given a game state.
Action masking is not supported for this agent.
This uses a very simple language to communicate with the leader. The leader specifies actions in an instruction like:
instruction: "forward, left, left, random, right, backwards".
This corresponds with simple follower actions, which the follower will then immediately take. "Random" results in a random action, from [left, forward, right, back].
"""
(map, cards, turn_state, instructions, actors, feedback) = game_state
# If no pending actions, parse them from the active instruction.
if len(self.actions) == 0:
active_instruction = _get_active_instruction(instructions)
if active_instruction is None:
logger.info(
f"No active instruction available. Invalid state. Taking NoopAction."
)
return Action.NoopAction()
self.actions.extend(_actions_from_instruction(active_instruction.text))
self.actions.append(Action.InstructionDone(active_instruction.uuid))
self.instructions_processed.add(active_instruction.uuid)
# Check actions again, in case none were parsed from the instruction.
if len(self.actions) == 0:
logger.info(
f"Ran out of commands to follow. Choosing {self.config.default_action}."
)
default_action_code = Action.ActionCode.from_str(self.config.default_action)
if default_action_code == Action.ActionCode.INSTRUCTION_DONE:
return Action.InstructionDone(active_instruction.uuid)
return Action(default_action_code)
# Return the next action.
action = self.actions[0]
self.actions.pop(0)
return action
def _actions_from_instruction(instruction):
actions = []
instruction_action_codes = instruction.split(",")
for action_code in instruction_action_codes:
action_code = action_code.strip().lower()
if len(action_code) == 0:
continue
if "forward".startswith(action_code):
actions.append(Action.Forwards())
elif "backward".startswith(action_code):
actions.append(Action.Backwards())
elif "left".startswith(action_code):
actions.append(Action.Left())
elif "right".startswith(action_code):
actions.append(Action.Right())
elif "random".startswith(action_code):
actions.append(Action.RandomMovementAction())
return actions
def _get_active_instruction(instructions):
for instruction in instructions:
if not instruction.completed and not instruction.cancelled:
return instruction
return None