From c3dc05674cfb0294b2c5050f9c3680053f24a0b2 Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Fri, 1 Sep 2023 18:11:38 -0400 Subject: [PATCH] Update environment creation tutorials (#1082) Co-authored-by: ggsavin --- .github/workflows/linux-tutorials-test.yml | 2 +- .pre-commit-config.yaml | 4 ++++ docs/code_examples/aec_rps_usage.py | 16 ++++++++++++++++ docs/code_examples/parallel_rps.py | 2 ++ docs/code_examples/parallel_rps_usage.py | 11 +++++++++++ docs/content/environment_creation.md | 19 +++++++++++++++++++ docs/index.md | 2 +- .../1-project-structure.md | 0 .../2-environment-logic.md | 0 .../3-action-masking.md | 0 .../4-testing-your-environment.md | 0 .../5-using-your-environment.md | 0 .../index.md | 8 +++++--- pettingzoo/__init__.py | 2 +- pettingzoo/test/parallel_test.py | 4 ++++ pettingzoo/utils/conversions.py | 14 ++++++++++++++ tutorials/CleanRL/requirements.txt | 2 +- tutorials/CustomEnvironment/requirements.txt | 1 + .../tutorial1_skeleton_creation.py | 0 .../tutorial2_adding_game_logic.py | 10 ++++++++-- .../tutorial3_action_masking.py | 8 ++++++-- .../tutorial4_testing_the_environment.py | 11 +++++++++++ .../4-TestingTheEnvironment.txt | 0 .../EnvironmentCreation/5-UsingWithAPI.txt | 0 .../EnvironmentCreation/6-UsingWithRL.txt | 0 .../EnvironmentCreation/requirements.txt | 1 - .../tutorial4_testing_the_environment.py | 7 ------- tutorials/Ray/requirements.txt | 7 ++++--- tutorials/SB3/connect_four/requirements.txt | 2 +- tutorials/SB3/kaz/requirements.txt | 2 +- tutorials/SB3/pistonball/requirements.txt | 2 +- tutorials/SB3/test/requirements.txt | 2 +- tutorials/SB3/waterworld/requirements.txt | 2 +- 33 files changed, 114 insertions(+), 27 deletions(-) create mode 100644 docs/code_examples/aec_rps_usage.py create mode 100644 docs/code_examples/parallel_rps_usage.py rename docs/tutorials/{environmentcreation => custom_environment}/1-project-structure.md (100%) rename docs/tutorials/{environmentcreation => custom_environment}/2-environment-logic.md (100%) rename docs/tutorials/{environmentcreation => custom_environment}/3-action-masking.md (100%) rename docs/tutorials/{environmentcreation => custom_environment}/4-testing-your-environment.md (100%) rename docs/tutorials/{environmentcreation => custom_environment}/5-using-your-environment.md (100%) rename docs/tutorials/{environmentcreation => custom_environment}/index.md (53%) create mode 100644 tutorials/CustomEnvironment/requirements.txt rename tutorials/{EnvironmentCreation => CustomEnvironment}/tutorial1_skeleton_creation.py (100%) rename tutorials/{EnvironmentCreation => CustomEnvironment}/tutorial2_adding_game_logic.py (94%) rename tutorials/{EnvironmentCreation => CustomEnvironment}/tutorial3_action_masking.py (96%) create mode 100644 tutorials/CustomEnvironment/tutorial4_testing_the_environment.py delete mode 100644 tutorials/EnvironmentCreation/4-TestingTheEnvironment.txt delete mode 100644 tutorials/EnvironmentCreation/5-UsingWithAPI.txt delete mode 100644 tutorials/EnvironmentCreation/6-UsingWithRL.txt delete mode 100644 tutorials/EnvironmentCreation/requirements.txt delete mode 100644 tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml index 20ba3000c..dc5600a21 100644 --- a/.github/workflows/linux-tutorials-test.yml +++ b/.github/workflows/linux-tutorials-test.yml @@ -19,7 +19,7 @@ jobs: fail-fast: false matrix: python-version: ['3.8', '3.9', '3.10', '3.11'] - tutorial: ['Tianshou', 'EnvironmentCreation', 'CleanRL', 'SB3/kaz', 'SB3/waterworld', 'SB3/connect_four', 'SB3/test'] # TODO: add back Ray once next release after 2.6.2 + tutorial: ['Tianshou', 'CustomEnvironment', 'CleanRL', 'SB3/kaz', 'SB3/waterworld', 'SB3/connect_four', 'SB3/test'] # TODO: add back Ray once next release after 2.6.2 steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 735642935..08174efa7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -75,3 +75,7 @@ repos: additional_dependencies: ["pyright"] args: - --project=pyproject.toml + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.26.3 + hooks: + - id: check-github-workflows diff --git a/docs/code_examples/aec_rps_usage.py b/docs/code_examples/aec_rps_usage.py new file mode 100644 index 000000000..71edc4e73 --- /dev/null +++ b/docs/code_examples/aec_rps_usage.py @@ -0,0 +1,16 @@ +import aec_rps + +env = aec_rps.env(render_mode="human") +env.reset(seed=42) + +for agent in env.agent_iter(): + observation, reward, termination, truncation, info = env.last() + + if termination or truncation: + action = None + else: + # this is where you would insert your policy + action = env.action_space(agent).sample() + + env.step(action) +env.close() diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py index 5d54c6483..383659666 100644 --- a/docs/code_examples/parallel_rps.py +++ b/docs/code_examples/parallel_rps.py @@ -130,6 +130,7 @@ def reset(self, seed=None, options=None): self.num_moves = 0 observations = {agent: NONE for agent in self.agents} infos = {agent: {} for agent in self.agents} + self.state = observations return observations, infos @@ -165,6 +166,7 @@ def step(self, actions): self.agents[i]: int(actions[self.agents[1 - i]]) for i in range(len(self.agents)) } + self.state = observations # typically there won't be any information in the infos, but there must # still be an entry for each agent diff --git a/docs/code_examples/parallel_rps_usage.py b/docs/code_examples/parallel_rps_usage.py new file mode 100644 index 000000000..38949eb78 --- /dev/null +++ b/docs/code_examples/parallel_rps_usage.py @@ -0,0 +1,11 @@ +import parallel_rps + +env = parallel_rps.parallel_env(render_mode="human") +observations, infos = env.reset() + +while env.agents: + # this is where you would insert your policy + actions = {agent: env.action_space(agent).sample() for agent in env.agents} + + observations, rewards, terminations, truncations, infos = env.step(actions) +env.close() diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md index f0cb89023..717c66bc2 100644 --- a/docs/content/environment_creation.md +++ b/docs/content/environment_creation.md @@ -5,6 +5,11 @@ title: Environment Creation This documentation overviews creating new environments and relevant useful wrappers, utilities and tests included in PettingZoo designed for the creation of new environments. + +We will walk through the creation of a simple Rock-Paper-Scissors environment, with example code for both [AEC](/api/aec/) and [Parallel](/api/aec/) environments. + +See our [Custom Environment Tutorial](/tutorials/custom_environment/index) for a full walkthrough on creating custom environments, including complex environment logic and illegal action masking. + ## Example Custom Environment This is a carefully commented version of the PettingZoo rock paper scissors environment. @@ -14,6 +19,13 @@ This is a carefully commented version of the PettingZoo rock paper scissors envi :language: python ``` +To interact with your custom AEC environment, use the following code: + +```{eval-rst} +.. literalinclude:: ../code_examples/aec_rps_usage.py + :language: python +``` + ## Example Custom Parallel Environment ```{eval-rst} @@ -21,6 +33,13 @@ This is a carefully commented version of the PettingZoo rock paper scissors envi :language: python ``` +To interact with your custom parallel environment, use the following code: + +```{eval-rst} +.. literalinclude:: ../code_examples/parallel_rps_usage.py + :language: python +``` + ## Using Wrappers A wrapper is an environment transformation that takes in an environment as input, and outputs a new environment that is similar to the input environment, but with some transformation or validation applied. PettingZoo provides [wrappers to convert environments](/api/pz_wrappers) back and forth between the AEC API and the Parallel API and a set of simple [utility wrappers](/api/pz_wrappers) which provide input validation and other convenient reusable logic. PettingZoo also includes [wrappers](/api/supersuit_wrappers) via the SuperSuit companion package (`pip install supersuit`). diff --git a/docs/index.md b/docs/index.md index 69ff05962..cf9613924 100644 --- a/docs/index.md +++ b/docs/index.md @@ -39,7 +39,7 @@ environments/third_party_envs :hidden: :caption: Tutorials -tutorials/environmentcreation/index +tutorials/custom_environment/index tutorials/cleanrl/index tutorials/tianshou/index tutorials/rllib/index diff --git a/docs/tutorials/environmentcreation/1-project-structure.md b/docs/tutorials/custom_environment/1-project-structure.md similarity index 100% rename from docs/tutorials/environmentcreation/1-project-structure.md rename to docs/tutorials/custom_environment/1-project-structure.md diff --git a/docs/tutorials/environmentcreation/2-environment-logic.md b/docs/tutorials/custom_environment/2-environment-logic.md similarity index 100% rename from docs/tutorials/environmentcreation/2-environment-logic.md rename to docs/tutorials/custom_environment/2-environment-logic.md diff --git a/docs/tutorials/environmentcreation/3-action-masking.md b/docs/tutorials/custom_environment/3-action-masking.md similarity index 100% rename from docs/tutorials/environmentcreation/3-action-masking.md rename to docs/tutorials/custom_environment/3-action-masking.md diff --git a/docs/tutorials/environmentcreation/4-testing-your-environment.md b/docs/tutorials/custom_environment/4-testing-your-environment.md similarity index 100% rename from docs/tutorials/environmentcreation/4-testing-your-environment.md rename to docs/tutorials/custom_environment/4-testing-your-environment.md diff --git a/docs/tutorials/environmentcreation/5-using-your-environment.md b/docs/tutorials/custom_environment/5-using-your-environment.md similarity index 100% rename from docs/tutorials/environmentcreation/5-using-your-environment.md rename to docs/tutorials/custom_environment/5-using-your-environment.md diff --git a/docs/tutorials/environmentcreation/index.md b/docs/tutorials/custom_environment/index.md similarity index 53% rename from docs/tutorials/environmentcreation/index.md rename to docs/tutorials/custom_environment/index.md index 36d0a1799..c82348759 100644 --- a/docs/tutorials/environmentcreation/index.md +++ b/docs/tutorials/custom_environment/index.md @@ -1,10 +1,10 @@ --- -title: "Environment Creation" +title: "Custom Environment Tutorial" --- -# Environment Creation Tutorial +# Custom Environment Tutorial -These tutorials walk you though creating a custom environment from scratch, and are recommended as a starting point for anyone new to PettingZoo. +These tutorials walk you though the full process of creating a custom environment from scratch, and are recommended as a starting point for anyone new to PettingZoo. 1. [Project Structure](/tutorials/environmentcreation/1-project-structure.md) @@ -14,6 +14,8 @@ These tutorials walk you though creating a custom environment from scratch, and 4. [Testing Your Environment](/tutorials/environmentcreation/4-testing-your-environment.md) +For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/aec/) implementations, see our [Environment Creation](/content/environment_creation/) documentation. + ```{toctree} :hidden: diff --git a/pettingzoo/__init__.py b/pettingzoo/__init__.py index 9f58764c9..2f20788c1 100644 --- a/pettingzoo/__init__.py +++ b/pettingzoo/__init__.py @@ -12,7 +12,7 @@ os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide" -__version__ = "1.24.0" +__version__ = "1.24.1" try: import sys diff --git a/pettingzoo/test/parallel_test.py b/pettingzoo/test/parallel_test.py index 2ba450a1b..4209ba296 100644 --- a/pettingzoo/test/parallel_test.py +++ b/pettingzoo/test/parallel_test.py @@ -46,8 +46,11 @@ def parallel_api_test(par_env: ParallelEnv, num_cycles=1000): MAX_RESETS = 2 for _ in range(MAX_RESETS): obs, infos = par_env.reset() + assert isinstance(obs, dict) + assert isinstance(infos, dict) assert set(obs.keys()) == (set(par_env.agents)) + assert set(infos.keys()) == (set(par_env.agents)) terminated = {agent: False for agent in par_env.agents} truncated = {agent: False for agent in par_env.agents} live_agents = set(par_env.agents[:]) @@ -127,3 +130,4 @@ def parallel_api_test(par_env: ParallelEnv, num_cycles=1000): if len(live_agents) == 0: break + print("Passed Parallel API test") diff --git a/pettingzoo/utils/conversions.py b/pettingzoo/utils/conversions.py index 48cf1fadb..589754cb8 100644 --- a/pettingzoo/utils/conversions.py +++ b/pettingzoo/utils/conversions.py @@ -1,3 +1,4 @@ +# pyright: reportGeneralTypeIssues=false import copy import warnings from collections import defaultdict @@ -304,6 +305,19 @@ def reset(self, seed=None, options=None): self.terminations = {agent: False for agent in self.agents} self.truncations = {agent: False for agent in self.agents} self.rewards = {agent: 0 for agent in self.agents} + + # Every environment needs to return infos that contain self.agents as their keys + if not self.infos: + warnings.warn( + "The `infos` dictionary returned by `env.reset` was empty. OverwritingAgent IDs will be used as keys" + ) + self.infos = {agent: {} for agent in self.agents} + elif set(self.infos.keys()) != set(self.agents): + self.infos = {agent: {self.infos.copy()} for agent in self.agents} + warnings.warn( + f"The `infos` dictionary returned by `env.reset()` is not valid: must contain keys for each agent defined in self.agents: {self.agents}. Overwriting with current info duplicated for each agent: {self.infos}" + ) + self._cumulative_rewards = {agent: 0 for agent in self.agents} self.new_agents = [] self.new_values = {} diff --git a/tutorials/CleanRL/requirements.txt b/tutorials/CleanRL/requirements.txt index 07894712e..87c5e0a80 100644 --- a/tutorials/CleanRL/requirements.txt +++ b/tutorials/CleanRL/requirements.txt @@ -1,4 +1,4 @@ -pettingzoo[butterfly,atari,testing]>=1.23.1 +pettingzoo[butterfly,atari,testing]>=1.24.0 SuperSuit>=3.9.0 tensorboard>=2.11.2 torch>=1.13.1 diff --git a/tutorials/CustomEnvironment/requirements.txt b/tutorials/CustomEnvironment/requirements.txt new file mode 100644 index 000000000..691bc14a8 --- /dev/null +++ b/tutorials/CustomEnvironment/requirements.txt @@ -0,0 +1 @@ +pettingzoo==1.24.0 diff --git a/tutorials/EnvironmentCreation/tutorial1_skeleton_creation.py b/tutorials/CustomEnvironment/tutorial1_skeleton_creation.py similarity index 100% rename from tutorials/EnvironmentCreation/tutorial1_skeleton_creation.py rename to tutorials/CustomEnvironment/tutorial1_skeleton_creation.py diff --git a/tutorials/EnvironmentCreation/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py similarity index 94% rename from tutorials/EnvironmentCreation/tutorial2_adding_game_logic.py rename to tutorials/CustomEnvironment/tutorial2_adding_game_logic.py index 5c06a14a9..a52222815 100644 --- a/tutorials/EnvironmentCreation/tutorial2_adding_game_logic.py +++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py @@ -44,7 +44,11 @@ def reset(self, seed=None, options=None): ) for a in self.agents } - return observations, {} + + # Get dummy infos. Necessary for proper parallel_to_aec conversion + infos = {a: {} for a in self.agents} + + return observations, infos def step(self, actions): # Execute actions @@ -85,7 +89,6 @@ def step(self, actions): if self.timestep > 100: rewards = {"prisoner": 0, "guard": 0} truncations = {"prisoner": True, "guard": True} - self.agents = [] self.timestep += 1 # Get observations @@ -101,6 +104,9 @@ def step(self, actions): # Get dummy infos (not used in this example) infos = {a: {} for a in self.agents} + if any(terminations.values()) or all(truncations.values()): + self.agents = [] + return observations, rewards, terminations, truncations, infos def render(self): diff --git a/tutorials/EnvironmentCreation/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py similarity index 96% rename from tutorials/EnvironmentCreation/tutorial3_action_masking.py rename to tutorials/CustomEnvironment/tutorial3_action_masking.py index b01639f3e..3d70d4893 100644 --- a/tutorials/EnvironmentCreation/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -8,7 +8,7 @@ from pettingzoo import ParallelEnv -class CustomEnvironment(ParallelEnv): +class CustomActionMaskedEnvironment(ParallelEnv): metadata = { "name": "custom_environment_v0", } @@ -45,7 +45,11 @@ def reset(self, seed=None, options=None): "prisoner": {"observation": observation, "action_mask": [0, 1, 1, 0]}, "guard": {"observation": observation, "action_mask": [1, 0, 0, 1]}, } - return observations, {} + + # Get dummy infos. Necessary for proper parallel_to_aec conversion + infos = {a: {} for a in self.agents} + + return observations, infos def step(self, actions): # Execute actions diff --git a/tutorials/CustomEnvironment/tutorial4_testing_the_environment.py b/tutorials/CustomEnvironment/tutorial4_testing_the_environment.py new file mode 100644 index 000000000..ac6a867e1 --- /dev/null +++ b/tutorials/CustomEnvironment/tutorial4_testing_the_environment.py @@ -0,0 +1,11 @@ +from tutorial2_adding_game_logic import CustomEnvironment +from tutorial3_action_masking import CustomActionMaskedEnvironment + +from pettingzoo.test import parallel_api_test + +if __name__ == "__main__": + env = CustomEnvironment() + parallel_api_test(env, num_cycles=1_000_000) + + env = CustomActionMaskedEnvironment() + parallel_api_test(env, num_cycles=1_000_000) diff --git a/tutorials/EnvironmentCreation/4-TestingTheEnvironment.txt b/tutorials/EnvironmentCreation/4-TestingTheEnvironment.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/EnvironmentCreation/5-UsingWithAPI.txt b/tutorials/EnvironmentCreation/5-UsingWithAPI.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/EnvironmentCreation/6-UsingWithRL.txt b/tutorials/EnvironmentCreation/6-UsingWithRL.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/tutorials/EnvironmentCreation/requirements.txt b/tutorials/EnvironmentCreation/requirements.txt deleted file mode 100644 index ae5c33767..000000000 --- a/tutorials/EnvironmentCreation/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pettingzoo==1.23.0 diff --git a/tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py b/tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py deleted file mode 100644 index cbfbf41b2..000000000 --- a/tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py +++ /dev/null @@ -1,7 +0,0 @@ -from tutorial3_action_masking import CustomEnvironment - -from pettingzoo.test import parallel_api_test - -if __name__ == "__main__": - env = CustomEnvironment() - parallel_api_test(env, num_cycles=1_000_000) diff --git a/tutorials/Ray/requirements.txt b/tutorials/Ray/requirements.txt index 62909ce6b..3cd41dae8 100644 --- a/tutorials/Ray/requirements.txt +++ b/tutorials/Ray/requirements.txt @@ -1,6 +1,7 @@ -PettingZoo[classic, butterfly]==1.23.1 +PettingZoo[classic,butterfly]>=1.24.0 Pillow>=9.4.0 -ray[rllib]>2.6.2 -SuperSuit==3.8.0 +# note: currently requires nightly release, see https://docs.ray.io/en/latest/ray-overview/installation.html#daily-releases-nightlies +ray[rllib]>2.6.3 +SuperSuit>=3.9.0 torch>=1.13.1 tensorflow-probability>=0.19.0 diff --git a/tutorials/SB3/connect_four/requirements.txt b/tutorials/SB3/connect_four/requirements.txt index 30917f7b2..bf7c59673 100644 --- a/tutorials/SB3/connect_four/requirements.txt +++ b/tutorials/SB3/connect_four/requirements.txt @@ -1,3 +1,3 @@ -pettingzoo[classic]>=1.23.1 +pettingzoo[classic]>=1.24.0 stable-baselines3>=2.0.0 sb3-contrib>=2.0.0 diff --git a/tutorials/SB3/kaz/requirements.txt b/tutorials/SB3/kaz/requirements.txt index c0e9ef734..01a14c748 100644 --- a/tutorials/SB3/kaz/requirements.txt +++ b/tutorials/SB3/kaz/requirements.txt @@ -1,3 +1,3 @@ -pettingzoo[butterfly]>=1.23.1 +pettingzoo[butterfly]>=1.24.0 stable-baselines3>=2.0.0 supersuit>=3.9.0 diff --git a/tutorials/SB3/pistonball/requirements.txt b/tutorials/SB3/pistonball/requirements.txt index c0e9ef734..01a14c748 100644 --- a/tutorials/SB3/pistonball/requirements.txt +++ b/tutorials/SB3/pistonball/requirements.txt @@ -1,3 +1,3 @@ -pettingzoo[butterfly]>=1.23.1 +pettingzoo[butterfly]>=1.24.0 stable-baselines3>=2.0.0 supersuit>=3.9.0 diff --git a/tutorials/SB3/test/requirements.txt b/tutorials/SB3/test/requirements.txt index 838ea192b..95e118fdd 100644 --- a/tutorials/SB3/test/requirements.txt +++ b/tutorials/SB3/test/requirements.txt @@ -1,4 +1,4 @@ -pettingzoo[classic]>=1.23.1 +pettingzoo[classic]>=1.24.0 stable-baselines3>=2.0.0 sb3-contrib>=2.0.0 pytest diff --git a/tutorials/SB3/waterworld/requirements.txt b/tutorials/SB3/waterworld/requirements.txt index 0b91a8538..87d3b18d7 100644 --- a/tutorials/SB3/waterworld/requirements.txt +++ b/tutorials/SB3/waterworld/requirements.txt @@ -1,4 +1,4 @@ -pettingzoo[sisl]>=1.23.1 +pettingzoo[sisl]>=1.24.0 stable-baselines3>=2.0.0 supersuit>=3.9.0 pymunk