Close #1 and refactor renderer

Talendar · Feb 8, 2021 · 98e250f · 98e250f
1 parent 77a765b
commit 98e250f
Show file tree

Hide file tree

Showing 11 changed files with 475 additions and 85 deletions.
diff --git a/README.md b/README.md
@@ -4,16 +4,21 @@
 [![PyPI](https://img.shields.io/pypi/v/flappy-bird-gym)](https://pypi.org/project/flappy-bird-gym/)
 [![License](https://img.shields.io/github/license/Talendar/flappy-bird-gym)](https://github.com/Talendar/flappy-bird-gym/blob/master/LICENSE)
 
-This repository contains an implementation of an OpenAI Gym environment for the Flappy Bird
-game. It's based on [FlapPyBird](https://github.com/sourabhv/FlapPyBird), by
-[@sourabhv](https://github.com/sourabhv). Currently, the environment provides the following
-observation parameters to the agents: 
+This repository contains the implementation of two OpenAI Gym environments for
+the Flappy Bird game. The implementation of the game's logic and graphics was
+based on the [FlapPyBird](https://github.com/sourabhv/FlapPyBird) project, by
+[@sourabhv](https://github.com/sourabhv). 
 
-* Horizontal distance to the next pipe;
-* Difference between the player's y position and the next hole's y position.
+The two environments differ only on the type of observations they yield for the
+agents. The "FlappyBird-rgb-v0" environment, yields RGB-arrays (images)
+representing the game's screen. The "FlappyBird-v0" environment, on the other
+hand, yields simple numerical information about the game's state as
+observations. The yielded attributes are the:
 
-In the future, I also intend to implement a version of the environment that provides an
-image representing the game's screen as observation.
+* horizontal distance to the next pipe;
+* difference between the player's y position and the next hole's y position.
+
+<br>
 
 <p align="center">
   <img align="center" 
@@ -37,9 +42,9 @@ To install `flappy-bird-gym`, simply run the following command:
 
 ## Usage
 
-Like with other `gym` environments, it's very easy to use `flappy-bird-gym`. Simply import the
-package and create the environment with the `make` function. Take a look at the sample code
-below:
+Like with other `gym` environments, it's very easy to use `flappy-bird-gym`.
+Simply import the package and create the environment with the `make` function.
+Take a look at the sample code below:
 
 ```
 import time

diff --git a/flappy_bird_gym/__init__.py b/flappy_bird_gym/__init__.py
@@ -21,24 +21,39 @@
 # SOFTWARE.
 # ==============================================================================
 
-""" Registers the gym environment and exports the `gym.make` function.
+""" Registers the gym environments and exports the `gym.make` function.
 """
 
+# Silencing pygame:
 import os
 os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
 
-from flappy_bird_gym.envs.flappy_bird_env import FlappyBirdEnv
+# Exporting envs:
+from flappy_bird_gym.envs.flappy_bird_env_rgb import FlappyBirdEnvRGB
+from flappy_bird_gym.envs.flappy_bird_env_simple import FlappyBirdEnvSimple
+
+# Exporting original game:
 from flappy_bird_gym import original_game
 
+# Exporting gym.make:
 from gym import make
+
+# Registering environments:
 from gym.envs.registration import register
 
 register(
     id="FlappyBird-v0",
-    entry_point="flappy_bird_gym:FlappyBirdEnv",
+    entry_point="flappy_bird_gym:FlappyBirdEnvSimple",
+)
+
+register(
+    id="FlappyBird-rgb-v0",
+    entry_point="flappy_bird_gym:FlappyBirdEnvRGB",
 )
 
+# Main names:
 __all__ = [
     make.__name__,
-    FlappyBirdEnv.__name__,
+    FlappyBirdEnvRGB.__name__,
+    FlappyBirdEnvSimple.__name__,
 ]
diff --git a/flappy_bird_gym/envs/__init__.py b/flappy_bird_gym/envs/__init__.py
@@ -23,4 +23,5 @@
 """ Exposes the environment class.
 """
 
-from flappy_bird_gym.envs.flappy_bird_env import FlappyBirdEnv
+from flappy_bird_gym.envs.flappy_bird_env_rgb import FlappyBirdEnvRGB
+from flappy_bird_gym.envs.flappy_bird_env_simple import FlappyBirdEnvSimple
diff --git a/flappy_bird_gym/envs/flappy_bird_env_rgb.py b/flappy_bird_gym/envs/flappy_bird_env_rgb.py
@@ -0,0 +1,156 @@
+#
+# Copyright (c) 2020 Gabriel Nogueira (Talendar)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ==============================================================================
+
+""" Implementation of a Flappy Bird OpenAI Gym environment that yields
+RBG-arrays representing the game's screen as observations.
+"""
+
+from typing import Dict, Tuple, Optional, Union
+
+import gym
+import numpy as np
+import pygame
+
+from flappy_bird_gym.envs.game_logic import FlappyBirdLogic
+from flappy_bird_gym.envs.renderer import FlappyBirdRenderer
+
+
+class FlappyBirdEnvRGB(gym.Env):
+    """  Flappy Bird Gym environment that yields images as observations.
+
+    The observations yielded by this environment are RGB-arrays (images)
+    representing the game's screen.
+
+    The reward received by the agent in each step is equal to the score obtained
+    by the agent in that step. A score point is obtained every time the bird
+    passes a pipe.
+
+    Args:
+        screen_size (Tuple[int, int]): The screen's width and height.
+        pipe_gap (int): Space between a lower and an upper pipe.
+        bird_color (str): Color of the flappy bird. The currently available
+            colors are "yellow", "blue" and "red".
+        pipe_color (str): Color of the pipes. The currently available colors are
+            "green" and "red".
+        background (Optional[str]): Type of background image. The currently
+            available types are "day" and "night". If `None`, no background will
+            be drawn.
+    """
+
+    metadata = {"render.modes": ["human", "rgb_array"]}
+
+    def __init__(self,
+                 screen_size: Tuple[int, int] = (288, 512),
+                 pipe_gap: int = 100,
+                 bird_color: str = "yellow",
+                 pipe_color: str = "green",
+                 background: Optional[str] = None) -> None:
+        self.action_space = gym.spaces.Discrete(2)
+        self.observation_space = gym.spaces.Box(0, 255, [*screen_size, 3])
+
+        self._screen_size = screen_size
+        self._pipe_gap = pipe_gap
+
+        self._game = None
+        self._renderer = FlappyBirdRenderer(screen_size=self._screen_size,
+                                            bird_color=bird_color,
+                                            pipe_color=pipe_color,
+                                            background=background)
+
+    def _get_observation(self):
+        self._renderer.draw_surface(show_score=False)
+        return pygame.surfarray.array3d(self._renderer.surface)
+
+    def reset(self):
+        """ Resets the environment (starts a new game).
+        """
+        self._game = FlappyBirdLogic(screen_size=self._screen_size,
+                                     pipe_gap_size=self._pipe_gap)
+
+        self._renderer.game = self._game
+        return self._get_observation()
+
+    def step(self,
+             action: Union[FlappyBirdLogic.Actions, int],
+    ) -> Tuple[np.ndarray, float, bool, Dict]:
+        """ Given an action, updates the game state.
+
+        Args:
+            action (Union[FlappyBirdLogic.Actions, int]): The action taken by
+                the agent. Zero (0) means "do nothing" and one (1) means "flap".
+
+        Returns:
+            A tuple containing, respectively:
+
+                * an observation (RGB-array representing the game's screen);
+                * a reward (always 1);
+                * a status report (`True` if the game is over and `False`
+                  otherwise);
+                * an info dictionary.
+        """
+        alive = self._game.update_state(action)
+        obs = self._get_observation()
+
+        reward = 1
+
+        done = not alive
+        info = {"score": self._game.score}
+
+        return obs, reward, done, info
+
+    def render(self, mode="human") -> Optional[np.ndarray]:
+        """ Renders the environment.
+
+        If ``mode`` is:
+
+            - human: render to the current display. Usually for human
+              consumption.
+            - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
+              representing RGB values for an x-by-y pixel image, suitable
+              for turning into a video.
+
+        Args:
+            mode (str): the mode to render with.
+
+        Returns:
+            `None` if ``mode`` is "human" and a numpy.ndarray with RGB values if
+            it's "rgb_array"
+        """
+        if mode not in FlappyBirdEnvRGB.metadata["render.modes"]:
+            raise ValueError("Invalid render mode!")
+
+        self._renderer.draw_surface(show_score=True)
+        if mode == "rgb_array":
+            return pygame.surfarray.array3d(self._renderer.surface)
+        else:
+            if self._renderer.display is None:
+                self._renderer.make_display()
+
+            self._renderer.update_display()
+
+    def close(self):
+        """ Closes the environment. """
+        if self._renderer is not None:
+            pygame.display.quit()
+            self._renderer = None
+
+        super().close()
diff --git a/flappy_bird_gym/envs/flappy_bird_env.py → ...y_bird_gym/envs/flappy_bird_env_simple.py b/flappy_bird_gym/envs/flappy_bird_env.py → ...y_bird_gym/envs/flappy_bird_env_simple.py
@@ -20,34 +20,36 @@
 # SOFTWARE.
 # ==============================================================================
 
-""" Implementation of the flappy bird gym environment.
+""" Implementation of a Flappy Bird OpenAI Gym environment that yields simple
+numerical information about the game's state as observations.
 """
 
-from typing import Dict, Tuple, Union
+from typing import Dict, Tuple, Optional, Union
 
 import gym
 import numpy as np
 import pygame
-from gym import spaces
 
 from flappy_bird_gym.envs.game_logic import FlappyBirdLogic
 from flappy_bird_gym.envs.game_logic import PIPE_WIDTH, PIPE_HEIGHT
 from flappy_bird_gym.envs.game_logic import PLAYER_WIDTH, PLAYER_HEIGHT
 from flappy_bird_gym.envs.renderer import FlappyBirdRenderer
 
 
-class FlappyBirdEnv(gym.Env):
-    """ Flappy bird gym environment.
+class FlappyBirdEnvSimple(gym.Env):
+    """ Flappy Bird Gym environment that yields simple observations.
+
+    The observations yielded by this environment are simple numerical
+    information about the game's state. Specifically, the observations are:
+
+        * Horizontal distance to the next pipe;
+        * Difference between the player's y position and the next hole's y
+          position.
 
     The reward received by the agent in each step is equal to the score obtained
     by the agent in that step. A score point is obtained every time the bird
     passes a pipe.
 
-    About the observation space:
-        [0] Horizontal distance to the next pipe;
-        [1] Difference between the player's y position and the next hole's y
-            position.
-
     Args:
         screen_size (Tuple[int, int]): The screen's width and height.
         normalize_obs (bool): If `True`, the observations will be normalized
@@ -57,8 +59,9 @@ class FlappyBirdEnv(gym.Env):
             colors are "yellow", "blue" and "red".
         pipe_color (str): Color of the pipes. The currently available colors are
             "green" and "red".
-        background (str): Type of background image. The currently available
-            types are "day" and "night".
+        background (Optional[str]): Type of background image. The currently
+            available types are "day" and "night". If `None`, no background will
+            be drawn.
     """
 
     metadata = {'render.modes': ['human']}
@@ -69,18 +72,17 @@ def __init__(self,
                  pipe_gap: int = 100,
                  bird_color: str = "yellow",
                  pipe_color: str = "green",
-                 background: str = "day") -> None:
-        self.action_space = spaces.Discrete(2)
-        self.observation_space = spaces.Box(-np.inf, np.inf,
-                                            shape=(2,),
-                                            dtype=np.float32)
+                 background: Optional[str] = "day") -> None:
+        self.action_space = gym.spaces.Discrete(2)
+        self.observation_space = gym.spaces.Box(-np.inf, np.inf,
+                                                shape=(2,),
+                                                dtype=np.float32)
         self._screen_size = screen_size
         self._normalize_obs = normalize_obs
         self._pipe_gap = pipe_gap
 
         self._game = None
         self._renderer = None
-        self._last_score = 0
 
         self._bird_color = bird_color
         self._pipe_color = pipe_color
@@ -152,18 +154,21 @@ def reset(self):
 
         return self._get_observation()
 
-    def render(self, mode='human'):
+    def render(self, mode='human') -> None:
         """ Renders the next frame. """
         if self._renderer is None:
             self._renderer = FlappyBirdRenderer(screen_size=self._screen_size,
                                                 bird_color=self._bird_color,
                                                 pipe_color=self._pipe_color,
                                                 background=self._bg_type)
             self._renderer.game = self._game
+            self._renderer.make_display()
 
-        self._renderer.render()
+        self._renderer.draw_surface(show_score=True)
+        self._renderer.update_display()
 
     def close(self):
+        """ Closes the environment. """
         if self._renderer is not None:
             pygame.display.quit()
             self._renderer = None