From b7a890a985659360cf4f5dfa6da9abf39e8e9dba Mon Sep 17 00:00:00 2001 From: Talendar <51515680+Talendar@users.noreply.github.com> Date: Sun, 7 Feb 2021 23:23:52 -0300 Subject: [PATCH] Refactored render and added RGB env --- README.md | 27 +-- flappy_bird_gym/__init__.py | 23 ++- flappy_bird_gym/envs/__init__.py | 3 +- flappy_bird_gym/envs/flappy_bird_env_rgb.py | 156 ++++++++++++++++++ ..._bird_env.py => flappy_bird_env_simple.py} | 45 ++--- flappy_bird_gym/envs/renderer.py | 104 +++++++++--- flappy_bird_gym/envs/utils.py | 49 +++--- setup.py | 2 +- tests/test_rgb_env_human.py | 137 +++++++++++++++ ..._env_human.py => test_simple_env_human.py} | 11 +- ...nv_random.py => test_simple_env_random.py} | 3 +- 11 files changed, 475 insertions(+), 85 deletions(-) create mode 100644 flappy_bird_gym/envs/flappy_bird_env_rgb.py rename flappy_bird_gym/envs/{flappy_bird_env.py => flappy_bird_env_simple.py} (81%) create mode 100644 tests/test_rgb_env_human.py rename tests/{test_env_human.py => test_simple_env_human.py} (92%) rename tests/{test_env_random.py => test_simple_env_random.py} (95%) diff --git a/README.md b/README.md index ac3acc4..124fb02 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,21 @@ [![PyPI](https://img.shields.io/pypi/v/flappy-bird-gym)](https://pypi.org/project/flappy-bird-gym/) [![License](https://img.shields.io/github/license/Talendar/flappy-bird-gym)](https://github.com/Talendar/flappy-bird-gym/blob/master/LICENSE) -This repository contains an implementation of an OpenAI Gym environment for the Flappy Bird -game. It's based on [FlapPyBird](https://github.com/sourabhv/FlapPyBird), by -[@sourabhv](https://github.com/sourabhv). Currently, the environment provides the following -observation parameters to the agents: +This repository contains the implementation of two OpenAI Gym environments for +the Flappy Bird game. The implementation of the game's logic and graphics was +based on the [FlapPyBird](https://github.com/sourabhv/FlapPyBird) project, by +[@sourabhv](https://github.com/sourabhv). -* Horizontal distance to the next pipe; -* Difference between the player's y position and the next hole's y position. +The two environments differ only on the type of observations they yield for the +agents. The "FlappyBird-rgb-v0" environment, yields RGB-arrays (images) +representing the game's screen. The "FlappyBird-v0" environment, on the other +hand, yields simple numerical information about the game's state as +observations. The yielded attributes are the: -In the future, I also intend to implement a version of the environment that provides an -image representing the game's screen as observation. +* horizontal distance to the next pipe; +* difference between the player's y position and the next hole's y position. + +

None: + self.action_space = gym.spaces.Discrete(2) + self.observation_space = gym.spaces.Box(0, 255, [*screen_size, 3]) + + self._screen_size = screen_size + self._pipe_gap = pipe_gap + + self._game = None + self._renderer = FlappyBirdRenderer(screen_size=self._screen_size, + bird_color=bird_color, + pipe_color=pipe_color, + background=background) + + def _get_observation(self): + self._renderer.draw_surface(show_score=False) + return pygame.surfarray.array3d(self._renderer.surface) + + def reset(self): + """ Resets the environment (starts a new game). + """ + self._game = FlappyBirdLogic(screen_size=self._screen_size, + pipe_gap_size=self._pipe_gap) + + self._renderer.game = self._game + return self._get_observation() + + def step(self, + action: Union[FlappyBirdLogic.Actions, int], + ) -> Tuple[np.ndarray, float, bool, Dict]: + """ Given an action, updates the game state. + + Args: + action (Union[FlappyBirdLogic.Actions, int]): The action taken by + the agent. Zero (0) means "do nothing" and one (1) means "flap". + + Returns: + A tuple containing, respectively: + + * an observation (RGB-array representing the game's screen); + * a reward (always 1); + * a status report (`True` if the game is over and `False` + otherwise); + * an info dictionary. + """ + alive = self._game.update_state(action) + obs = self._get_observation() + + reward = 1 + + done = not alive + info = {"score": self._game.score} + + return obs, reward, done, info + + def render(self, mode="human") -> Optional[np.ndarray]: + """ Renders the environment. + + If ``mode`` is: + + - human: render to the current display. Usually for human + consumption. + - rgb_array: Return an numpy.ndarray with shape (x, y, 3), + representing RGB values for an x-by-y pixel image, suitable + for turning into a video. + + Args: + mode (str): the mode to render with. + + Returns: + `None` if ``mode`` is "human" and a numpy.ndarray with RGB values if + it's "rgb_array" + """ + if mode not in FlappyBirdEnvRGB.metadata["render.modes"]: + raise ValueError("Invalid render mode!") + + self._renderer.draw_surface(show_score=True) + if mode == "rgb_array": + return pygame.surfarray.array3d(self._renderer.surface) + else: + if self._renderer.display is None: + self._renderer.make_display() + + self._renderer.update_display() + + def close(self): + """ Closes the environment. """ + if self._renderer is not None: + pygame.display.quit() + self._renderer = None + + super().close() diff --git a/flappy_bird_gym/envs/flappy_bird_env.py b/flappy_bird_gym/envs/flappy_bird_env_simple.py similarity index 81% rename from flappy_bird_gym/envs/flappy_bird_env.py rename to flappy_bird_gym/envs/flappy_bird_env_simple.py index ea23a49..bdb22b6 100644 --- a/flappy_bird_gym/envs/flappy_bird_env.py +++ b/flappy_bird_gym/envs/flappy_bird_env_simple.py @@ -20,15 +20,15 @@ # SOFTWARE. # ============================================================================== -""" Implementation of the flappy bird gym environment. +""" Implementation of a Flappy Bird OpenAI Gym environment that yields simple +numerical information about the game's state as observations. """ -from typing import Dict, Tuple, Union +from typing import Dict, Tuple, Optional, Union import gym import numpy as np import pygame -from gym import spaces from flappy_bird_gym.envs.game_logic import FlappyBirdLogic from flappy_bird_gym.envs.game_logic import PIPE_WIDTH, PIPE_HEIGHT @@ -36,18 +36,20 @@ from flappy_bird_gym.envs.renderer import FlappyBirdRenderer -class FlappyBirdEnv(gym.Env): - """ Flappy bird gym environment. +class FlappyBirdEnvSimple(gym.Env): + """ Flappy Bird Gym environment that yields simple observations. + + The observations yielded by this environment are simple numerical + information about the game's state. Specifically, the observations are: + + * Horizontal distance to the next pipe; + * Difference between the player's y position and the next hole's y + position. The reward received by the agent in each step is equal to the score obtained by the agent in that step. A score point is obtained every time the bird passes a pipe. - About the observation space: - [0] Horizontal distance to the next pipe; - [1] Difference between the player's y position and the next hole's y - position. - Args: screen_size (Tuple[int, int]): The screen's width and height. normalize_obs (bool): If `True`, the observations will be normalized @@ -57,8 +59,9 @@ class FlappyBirdEnv(gym.Env): colors are "yellow", "blue" and "red". pipe_color (str): Color of the pipes. The currently available colors are "green" and "red". - background (str): Type of background image. The currently available - types are "day" and "night". + background (Optional[str]): Type of background image. The currently + available types are "day" and "night". If `None`, no background will + be drawn. """ metadata = {'render.modes': ['human']} @@ -69,18 +72,17 @@ def __init__(self, pipe_gap: int = 100, bird_color: str = "yellow", pipe_color: str = "green", - background: str = "day") -> None: - self.action_space = spaces.Discrete(2) - self.observation_space = spaces.Box(-np.inf, np.inf, - shape=(2,), - dtype=np.float32) + background: Optional[str] = "day") -> None: + self.action_space = gym.spaces.Discrete(2) + self.observation_space = gym.spaces.Box(-np.inf, np.inf, + shape=(2,), + dtype=np.float32) self._screen_size = screen_size self._normalize_obs = normalize_obs self._pipe_gap = pipe_gap self._game = None self._renderer = None - self._last_score = 0 self._bird_color = bird_color self._pipe_color = pipe_color @@ -152,7 +154,7 @@ def reset(self): return self._get_observation() - def render(self, mode='human'): + def render(self, mode='human') -> None: """ Renders the next frame. """ if self._renderer is None: self._renderer = FlappyBirdRenderer(screen_size=self._screen_size, @@ -160,10 +162,13 @@ def render(self, mode='human'): pipe_color=self._pipe_color, background=self._bg_type) self._renderer.game = self._game + self._renderer.make_display() - self._renderer.render() + self._renderer.draw_surface(show_score=True) + self._renderer.update_display() def close(self): + """ Closes the environment. """ if self._renderer is not None: pygame.display.quit() self._renderer = None diff --git a/flappy_bird_gym/envs/renderer.py b/flappy_bird_gym/envs/renderer.py index dcca107..ea07fb6 100644 --- a/flappy_bird_gym/envs/renderer.py +++ b/flappy_bird_gym/envs/renderer.py @@ -29,13 +29,17 @@ released under the MIT license. """ -from typing import Tuple +from typing import Optional, Tuple import pygame from flappy_bird_gym.envs import utils -PLAYER_ROT_THR = 20 # rotation threshold +#: Player's rotation threshold. +PLAYER_ROT_THR = 20 + +#: Color to fill the surface's background when no background image was loaded. +FILL_BACKGROUND_COLOR = (200, 200, 200) class FlappyBirdRenderer: @@ -57,23 +61,44 @@ def __init__(self, audio_on: bool = True, bird_color: str = "yellow", pipe_color: str = "green", - background: str = "day") -> None: + background: Optional[str] = "day") -> None: self._screen_width = screen_size[0] self._screen_height = screen_size[1] - self.display = pygame.display.set_mode(screen_size) - self.images = utils.load_images(bird_color=bird_color, + self.display = None + self.surface = pygame.Surface(screen_size) + self.images = utils.load_images(convert=False, + bird_color=bird_color, pipe_color=pipe_color, bg_type=background) self.audio_on = audio_on + self._audio_queue = [] if audio_on: self.sounds = utils.load_sounds() self.game = None self._clock = pygame.time.Clock() - def _show_score(self) -> None: - """ Displays score in center of screen. """ + def make_display(self) -> None: + """ Initializes the pygame's display. + + Required for drawing images on the screen. + """ + self.display = pygame.display.set_mode((self._screen_width, + self._screen_height)) + for name, value in self.images.items(): + if value is None: + continue + + if type(value) in (tuple, list): + self.images[name] = tuple([img.convert_alpha() + for img in value]) + else: + self.images[name] = (value.convert() if name == "background" + else value.convert_alpha()) + + def _draw_score(self) -> None: + """ Draws the score in the center of the surface. """ score_digits = [int(x) for x in list(str(self.game.score))] total_width = 0 # total width of all numbers to be printed @@ -83,45 +108,78 @@ def _show_score(self) -> None: x_offset = (self._screen_width - total_width) / 2 for digit in score_digits: - self.display.blit(self.images['numbers'][digit], + self.surface.blit(self.images['numbers'][digit], (x_offset, self._screen_height * 0.1)) x_offset += self.images['numbers'][digit].get_width() - def render(self) -> None: - """ Renders the next frame. """ + def draw_surface(self, show_score: bool = True) -> None: + """ Re-draws the renderer's surface. + + This method updates the renderer's surface by re-drawing it according to + the current state of the game. + + Args: + show_score (bool): Whether to draw the player's score or not. + """ if self.game is None: raise ValueError("A game logic must be assigned to the renderer!") - # Sounds: - if self.audio_on and self.game.sound_cache is not None: - self.sounds[self.game.sound_cache].play() - - # Images: - self.display.blit(self.images['background'], (0, 0)) + # Background + if self.images['background'] is not None: + self.surface.blit(self.images['background'], (0, 0)) + else: + self.surface.fill(FILL_BACKGROUND_COLOR) + # Pipes for up_pipe, low_pipe in zip(self.game.upper_pipes, self.game.lower_pipes): - self.display.blit(self.images['pipe'][0], + self.surface.blit(self.images['pipe'][0], (up_pipe['x'], up_pipe['y'])) - self.display.blit(self.images['pipe'][1], + self.surface.blit(self.images['pipe'][1], (low_pipe['x'], low_pipe['y'])) - self.display.blit(self.images['base'], (self.game.base_x, + # Base (ground) + self.surface.blit(self.images['base'], (self.game.base_x, self.game.base_y)) - # print score so player overlaps the score - self._show_score() - # Player rotation has a threshold + # Score + # (must be drawn before the player, so the player overlaps it) + if show_score: + self._draw_score() + + # Getting player's rotation visible_rot = PLAYER_ROT_THR if self.game.player_rot <= PLAYER_ROT_THR: visible_rot = self.game.player_rot + # Player player_surface = pygame.transform.rotate( self.images['player'][self.game.player_idx], visible_rot, ) - self.display.blit(player_surface, (self.game.player_x, + self.surface.blit(player_surface, (self.game.player_x, self.game.player_y)) + def update_display(self) -> None: + """ Updates the display with the current surface of the renderer. + + A call to this method is usually preceded by a call to + :meth:`.draw_surface()`. This method simply updates the display by + showing the current state of the renderer's surface on it, it doesn't + make any change to the surface. + """ + if self.display is None: + raise RuntimeError( + "Tried to update the display, but a display hasn't been " + "created yet! To create a display for the renderer, you must " + "call the `make_display()` method." + ) + + self.display.blit(self.surface, [0, 0]) pygame.display.update() + + # Sounds: + if self.audio_on and self.game.sound_cache is not None: + sound_name = self.game.sound_cache + self.sounds[sound_name].play() diff --git a/flappy_bird_gym/envs/utils.py b/flappy_bird_gym/envs/utils.py index 6b12ed1..ca63967 100644 --- a/flappy_bird_gym/envs/utils.py +++ b/flappy_bird_gym/envs/utils.py @@ -31,7 +31,7 @@ import os from pathlib import Path import sys -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from pygame import image as pyg_image from pygame import mixer as pyg_mixer @@ -75,7 +75,15 @@ def get_hitmask(image) -> List[List[bool]]: return mask -def load_images(bg_type: str = "day", +def _load_sprite(filename, convert, alpha=True): + img = pyg_image.load(f"{SPRITES_PATH}/{filename}") + return (img.convert_alpha() if convert and alpha + else img.convert() if convert + else img) + + +def load_images(convert: bool = True, + bg_type: Optional[str] = "day", bird_color: str = "yellow", pipe_color: str = "green") -> Dict[str, Any]: """ Loads and returns the image assets of the game. """ @@ -84,39 +92,42 @@ def load_images(bg_type: str = "day", try: # Sprites with the number for the score display: images["numbers"] = tuple([ - pyg_image.load(f"{SPRITES_PATH}/{n}.png").convert_alpha() + _load_sprite(f"{n}.png", convert=convert, alpha=True) for n in range(10) ]) # Game over sprite: - images["gameover"] = pyg_image.load( - f"{SPRITES_PATH}/gameover.png").convert_alpha() + images["gameover"] = _load_sprite("gameover.png", + convert=convert, alpha=True) # Welcome screen message sprite: - images["message"] = pyg_image.load( - f"{SPRITES_PATH}/message.png").convert_alpha() + images["message"] = _load_sprite("message.png", + convert=convert, alpha=True) # Sprite for the base (ground): - images["base"] = pyg_image.load( - f"{SPRITES_PATH}/base.png").convert_alpha() + images["base"] = _load_sprite("base.png", + convert=convert, alpha=True) # Background sprite: - images["background"] = pyg_image.load( - f"{SPRITES_PATH}/background-{bg_type}.png").convert() + if bg_type is None: + images["background"] = None + else: + images["background"] = _load_sprite(f"background-{bg_type}.png", + convert=convert, alpha=False) # Bird sprites: images["player"] = ( - pyg_image.load( - f"{SPRITES_PATH}/{bird_color}bird-upflap.png").convert_alpha(), - pyg_image.load( - f"{SPRITES_PATH}/{bird_color}bird-midflap.png").convert_alpha(), - pyg_image.load( - f"{SPRITES_PATH}/{bird_color}bird-downflap.png").convert_alpha(), + _load_sprite(f"{bird_color}bird-upflap.png", + convert=convert, alpha=True), + _load_sprite(f"{bird_color}bird-midflap.png", + convert=convert, alpha=True), + _load_sprite(f"{bird_color}bird-downflap.png", + convert=convert, alpha=True), ) # Pipe sprites: - pipe_sprite = pyg_image.load( - f"{SPRITES_PATH}/pipe-{pipe_color}.png").convert_alpha() + pipe_sprite = _load_sprite(f"pipe-{pipe_color}.png", + convert=convert, alpha=True) images["pipe"] = (img_flip(pipe_sprite, False, True), pipe_sprite) except FileNotFoundError as ex: diff --git a/setup.py b/setup.py index 8fe5af7..3bc418b 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ from typing import List import setuptools -_VERSION = "0.2.0" +_VERSION = "0.3.0" # Short description. short_description = "An OpenAI gym environment for the Flappy Bird game." diff --git a/tests/test_rgb_env_human.py b/tests/test_rgb_env_human.py new file mode 100644 index 0000000..4ac8476 --- /dev/null +++ b/tests/test_rgb_env_human.py @@ -0,0 +1,137 @@ +# MIT License +# +# Copyright (c) 2020 Gabriel Nogueira (Talendar) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================== + +""" Tests the RGB-observations version of the Flappy Bird environment with a +human player. +""" + +import time + +import flappy_bird_gym +import numpy as np +import pygame +from PIL import Image + + +def play_with_render(env): + clock = pygame.time.Clock() + score = 0 + + obs = env.reset() + while True: + env.render() + + # Getting action: + action = 0 + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + if (event.type == pygame.KEYDOWN and + (event.key == pygame.K_SPACE or event.key == pygame.K_UP)): + action = 1 + + # Processing: + obs, reward, done, info = env.step(action) + + score += reward + print(f"Obs shape: {obs.shape}") + print(f"Score: {score}\n") + + clock.tick(30) + + if done: + env.render() + time.sleep(0.6) + break + + +def play_with_obs(env, greyscale: bool): + obs = env.reset() + + # noinspection PyProtectedMember + display = pygame.display.set_mode((env._renderer._screen_width, + env._renderer._screen_height)) + clock = pygame.time.Clock() + score = 0 + + while True: + if greyscale: + obs = obs.mean(axis=-1) + print(f"Grayscale obs shape: {obs.shape}") + obs = np.repeat(obs[:, :, np.newaxis], 3, axis=2) + + pygame.surfarray.blit_array(display, obs) + pygame.display.update() + + # Getting action: + action = 0 + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + if (event.type == pygame.KEYDOWN and + (event.key == pygame.K_SPACE or event.key == pygame.K_UP)): + action = 1 + + # Processing: + obs, reward, done, info = env.step(action) + + score += reward + print(f"Obs shape: {obs.shape}") + print(f"Score: {score}\n") + + clock.tick(30) + + if done: + time.sleep(0.6) + break + + +def visualize_obs(env, greyscale: bool): + obs = env.reset() + obs = np.moveaxis(obs, source=1, destination=0) # width <-> height + if greyscale: + obs = obs.mean(axis=-1) + + print(f"Obs shape: {obs.shape}") + img = Image.fromarray(obs) + + img.show() + time.sleep(3) + img.close() + + +if __name__ == "__main__": + flappy_env = flappy_bird_gym.make("FlappyBird-rgb-v0") + + print(f"Action space: {flappy_env.action_space}") + print(f"Observation space: {flappy_env.observation_space}") + + visualize_obs(env=flappy_env, greyscale=False) + visualize_obs(env=flappy_env, greyscale=True) + + play_with_render(env=flappy_env) + + play_with_obs(env=flappy_env, greyscale=False) + play_with_obs(env=flappy_env, greyscale=True) + + flappy_env.close() diff --git a/tests/test_env_human.py b/tests/test_simple_env_human.py similarity index 92% rename from tests/test_env_human.py rename to tests/test_simple_env_human.py index cb184a5..726e4fa 100644 --- a/tests/test_env_human.py +++ b/tests/test_simple_env_human.py @@ -21,7 +21,8 @@ # SOFTWARE. # ============================================================================== -""" Tests the flappy bird environment with a human player. +""" Tests the simple-observations version of the Flappy Bird environment with a +human player. """ import time @@ -30,7 +31,7 @@ import flappy_bird_gym -def main(): +def play(): # env = gym.make("flappy_bird_gym:FlappyBird-v0") env = flappy_bird_gym.make("FlappyBird-v0") @@ -55,9 +56,9 @@ def main(): score += reward print(f"Obs: {obs}") - print(f"Score: {score}") + print(f"Score: {score}\n") - clock.tick(12) + clock.tick(15) if done: env.render() @@ -68,4 +69,4 @@ def main(): if __name__ == "__main__": - main() + play() diff --git a/tests/test_env_random.py b/tests/test_simple_env_random.py similarity index 95% rename from tests/test_env_random.py rename to tests/test_simple_env_random.py index e745804..6924325 100644 --- a/tests/test_env_random.py +++ b/tests/test_simple_env_random.py @@ -21,7 +21,8 @@ # SOFTWARE. # ============================================================================== -""" Tests the flappy bird environment with a human player. +""" Tests the simple-observations version of the Flappy Bird environment with a +random agent. """ import time