Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing auto-gen docs for DataFrame #230

Merged
merged 23 commits into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ with gzip.open(os.path.join('output.gzip'), 'wb') as fo:
# return the proto object in python
proto_object = analysis_manager.get_protobuf_data()

# return the proto object as a json object
json_oject = analysis_manager.get_json_data()

# return the pandas data frame in python
dataframe = analysis_manager.get_data_frame()
```
Expand Down
215 changes: 135 additions & 80 deletions carball/analysis/analysis_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import os

from google.protobuf.json_format import _Printer
from typing.io import IO

from .utils.json_encoder import CarballJsonEncoder

script_path = os.path.abspath(__file__)
Expand Down Expand Up @@ -43,45 +45,141 @@ def __init__(self, game: Game):
self.game = game
self.protobuf_game = game_pb2.Game()
self.protobuf_game.version = PROTOBUF_VERSION
self.id_creator = self.create_player_id_function(game)
self.id_creator = self._create_player_id_function(game)
self.stats_manager = StatsManager()
self.events_creator = EventsCreator(self.id_creator)
self.should_store_frames = False
self.df_bytes = None

def create_analysis(self, calculate_intensive_events: bool = False):
def create_analysis(self, calculate_intensive_events: bool = False, clean: bool = True):
"""
Sets basic metadata, and decides whether analysis can be performed and then passes required parameters
to perform_full_analysis(...); After, stores the DataFrame.
:param calculate_intensive_events: Indicates if expensive calculations should run to include additional stats.
:param clean: Indicates if useless/invalid data should be found and removed.
"""

self.start_time()
player_map = self.get_game_metadata(self.game, self.protobuf_game)
self.log_time("Getting in-game frame-by-frame data...")
data_frame = self.get_data_frames(self.game)
self.log_time("Getting important frames (kickoff, first-touch)...")
kickoff_frames, first_touch_frames = self.get_kickoff_frames(self.game, self.protobuf_game, data_frame)
self.log_time("Setting game kickoff frames...")
self._start_time()
player_map = self._get_game_metadata(self.game, self.protobuf_game)
self._log_time("Getting in-game frame-by-frame data...")
data_frame = self._initialize_data_frame(self.game)
self._log_time("Getting important frames (kickoff, first-touch)...")
kickoff_frames, first_touch_frames = self._get_kickoff_frames(self.game, self.protobuf_game, data_frame)
self._log_time("Setting game kickoff frames...")
self.game.kickoff_frames = kickoff_frames

if self.can_do_full_analysis(first_touch_frames):
self.perform_full_analysis(self.game, self.protobuf_game, player_map,
data_frame, kickoff_frames, first_touch_frames,
calculate_intensive_events=calculate_intensive_events)
if self._can_do_full_analysis(first_touch_frames):
self._perform_full_analysis(self.game, self.protobuf_game, player_map,
data_frame, kickoff_frames, first_touch_frames,
calculate_intensive_events=calculate_intensive_events,
clean=clean)
else:
self.log_time("Cannot perform analysis: invalid analysis.")
self._log_time("Cannot perform analysis: invalid analysis.")
self.protobuf_game.game_metadata.is_invalid_analysis = True

# log before we add the dataframes
# logger.debug(self.protobuf_game)

self.store_frames(data_frame)
self._store_frames(data_frame)

def write_json_out_to_file(self, file: IO):
"""
Writes the json data to the specified file, as text.
NOTES:
The data is written as text (i.e. string), and the buffer mode must be 'w'.
E.g. open(file_name, 'w')
:param file: The file object (or a buffer).
"""

if 'b' in file.mode:
raise IOError("Json files can not be binary use open(path,\"w\")")
printer = _Printer()
js = printer._MessageToJsonObject(self.protobuf_game)
json.dump(js, file, indent=2, cls=CarballJsonEncoder)

def write_proto_out_to_file(self, file: IO):
"""
Writes the proto buffer data to the specified file, as bytes.
NOTES:
The data is written as bytes (i.e. in binary), and the buffer mode must be 'wb'.
E.g. open(file_name, 'wb')
The file will NOT be human-readable.
:param file: The file object (or a buffer).
"""

if 'b' not in file.mode:
raise IOError("Proto files must be binary use open(path,\"wb\")")
ProtobufManager.write_proto_out_to_file(file, self.protobuf_game)

def write_pandas_out_to_file(self, file: IO):
"""
Writes the pandas data to the specified file, as bytes.
NOTES:
The data is written as bytes (i.e. in binary), and the buffer mode must be 'wb'.
E.g. open(file_name, 'wb')
The file will NOT be human-readable.
:param file: The file object (or a buffer).
"""

if 'b' not in file.mode:
raise IOError("Proto files must be binary use open(path,\"wb\")")
if self.df_bytes is not None:
file.write(self.df_bytes)
elif not self.should_store_frames:
logger.warning("pd DataFrames are not being stored anywhere")

def get_protobuf_data(self) -> game_pb2.Game:
"""
:return: The protobuf data created by the analysis
USAGE: A Protocol Buffer contains in-game metadata (e.g. events, stats). Treat it as a usual Python object with
fields that match the API.
INFO: The Protocol Buffer is a collection of data organized in a format similar to json. All relevant .proto
files found at https://github.com/SaltieRL/carball/tree/master/api.
Google's developer guide to protocol buffers may be found at https://developers.google.com/protocol-buffers/docs/overview
"""
return self.protobuf_game

def get_json_data(self):
"""
:return: The protobuf data created by the analysis as a json object.
see get_protobuf_data for more details.
The json fields are defined by https://github.com/SaltieRL/carball/tree/master/api
"""
printer = _Printer()
js = printer._MessageToJsonObject(self.protobuf_game)
return js

def get_data_frame(self) -> pd.DataFrame:
"""
:return: The pandas.DataFrame object.
USAGE: A DataFrame contains in-game frame-by-frame data.
INFO: The DataFrame is a collection of data organized in a format similar to csv. The 'index' column of the
DataFrame is the consecutive in-game frames, and all other column headings (150+) are tuples in the following
format:
(Object, Data), where the Object is either a player, the ball or the game.
All column information (and keys) may be seen by calling data_frame.info(verbose=True)
All further documentation about the DataFrame can be found at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
"""
return self.data_frame

def perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_map: Dict[str, Player],
data_frame: pd.DataFrame, kickoff_frames: pd.DataFrame, first_touch_frames: pd.Series,
calculate_intensive_events: bool = False):
def _perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_map: Dict[str, Player],
data_frame: pd.DataFrame, kickoff_frames: pd.DataFrame, first_touch_frames: pd.Series,
calculate_intensive_events: bool = False, clean: bool = True):

"""
Sets some further data and cleans the replay;
Expand All @@ -94,17 +192,19 @@ def perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_ma
:param kickoff_frames: Contains data about the kickoffs.
:param first_touch_frames: Contains data for frames where touches can actually occur.
:param calculate_intensive_events: Indicates if expensive calculations should run to include additional stats.
:param clean: Indicates if useless/invalid data should be found and removed.
"""

self.get_game_time(proto_game, data_frame)
clean_replay(game, data_frame, proto_game, player_map)
self.log_time("Creating events...")
self._get_game_time(proto_game, data_frame)
if clean:
clean_replay(game, data_frame, proto_game, player_map)
self._log_time("Creating events...")
self.events_creator.create_events(game, proto_game, player_map, data_frame, kickoff_frames, first_touch_frames,
calculate_intensive_events=calculate_intensive_events)
self.log_time("Getting stats...")
self.get_stats(game, proto_game, player_map, data_frame)
self._log_time("Getting stats...")
self._get_stats(game, proto_game, player_map, data_frame)

def get_game_metadata(self, game: Game, proto_game: game_pb2.Game) -> Dict[str, Player]:
def _get_game_metadata(self, game: Game, proto_game: game_pb2.Game) -> Dict[str, Player]:
"""
Processes protobuf data and sets the respective object fields to correct values.
Maps the player's specific online ID (steam unique ID) to the player object.
Expand Down Expand Up @@ -133,7 +233,7 @@ def get_game_metadata(self, game: Game, proto_game: game_pb2.Game) -> Dict[str,

return player_map

def get_game_time(self, protobuf_game: game_pb2.Game, data_frame: pd.DataFrame):
def _get_game_time(self, protobuf_game: game_pb2.Game, data_frame: pd.DataFrame):
"""
Calculates the game length (total time the game lasted) and sets it to the relevant metadata length field.
Calculates the total time a player has spent in the game and sets it to the relevant player field.
Expand All @@ -153,7 +253,7 @@ def get_game_time(self, protobuf_game: game_pb2.Game, data_frame: pd.DataFrame):

logger.info("Set each player's in-game times.")

def get_kickoff_frames(self, game: Game, proto_game: game_pb2.Game, data_frame: pd.DataFrame):
def _get_kickoff_frames(self, game: Game, proto_game: game_pb2.Game, data_frame: pd.DataFrame):
"""
Firstly, fetches kickoff-related data from SaltieGame.
Secondly, checks for edge-cases and corrects errors.
Expand Down Expand Up @@ -188,10 +288,10 @@ def get_kickoff_frames(self, game: Game, proto_game: game_pb2.Game, data_frame:

return kickoff_frames, first_touch_frames

def get_stats(self, game: Game, proto_game: game_pb2.Game, player_map: Dict[str, Player],
data_frame: pd.DataFrame):
def _get_stats(self, game: Game, proto_game: game_pb2.Game, player_map: Dict[str, Player],
data_frame: pd.DataFrame):
"""
For each in-game frame after a goal has happened, calculate in-game stats
For each in-game frame after a goal has happened, calculate in-game stats.
(i.e. player, team, general-game and hit stats)
:param game: The game object (instance of Game). It contains the replay metadata and processed json data.
Expand All @@ -203,70 +303,25 @@ def get_stats(self, game: Game, proto_game: game_pb2.Game, player_map: Dict[str,
goal_frames = data_frame.game.goal_number.notnull()
self.stats_manager.get_stats(game, proto_game, player_map, data_frame[goal_frames])

def store_frames(self, data_frame: pd.DataFrame):
def _store_frames(self, data_frame: pd.DataFrame):
self.data_frame = data_frame
self.df_bytes = PandasManager.safe_write_pandas_to_memory(data_frame)

def write_json_out_to_file(self, file):
printer = _Printer()
js = printer._MessageToJsonObject(self.protobuf_game)
json.dump(js, file, indent=2, cls=CarballJsonEncoder)

def write_proto_out_to_file(self, file):
ProtobufManager.write_proto_out_to_file(file, self.protobuf_game)

def write_pandas_out_to_file(self, file):
if self.df_bytes is not None:
file.write(self.df_bytes)
elif not self.should_store_frames:
logger.warning("pd DataFrames are not being stored anywhere")

def get_protobuf_data(self) -> game_pb2.Game:
"""
:return: The protobuf data created by the analysis
USAGE: A Protocol Buffer contains in-game metadata (e.g. events, stats). Treat it as a usual Python object with
fields that match the API.
INFO: The Protocol Buffer is a collection of data organized in a format similar to json. All relevant .proto
files found at https://github.com/SaltieRL/carball/tree/master/api.
Google's developer guide to protocol buffers may be found at https://developers.google.com/protocol-buffers/docs/overview
"""
return self.protobuf_game

def get_data_frame(self) -> pd.DataFrame:
"""
:return: The pandas.DataFrame object.
USAGE: A DataFrame contains in-game frame-by-frame data.
INFO: The DataFrame is a collection of data organized in a format similar to csv. The 'index' column of the
DataFrame is the consecutive in-game frames, and all other column headings (150+) are tuples in the following
format:
(Object, Data), where the Object is either a player, the ball or the game.
All column information (and keys) may be seen by calling data_frame.info(verbose=True)
All further documentation about the DataFrame can be found at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
"""
return self.data_frame

def get_data_frames(self, game: Game):
def _initialize_data_frame(self, game: Game):
data_frame = SaltieGame.create_data_df(game)

logger.info("Assigned goal_number in .data_frame")
return data_frame

def create_player_id_function(self, game: Game) -> Callable:
def _create_player_id_function(self, game: Game) -> Callable:
name_map = {player.name: player.online_id for player in game.players}

def create_name(proto_player_id, name):
proto_player_id.id = str(name_map[name])

return create_name

def can_do_full_analysis(self, first_touch_frames) -> bool:
def _can_do_full_analysis(self, first_touch_frames) -> bool:
"""
Check whether or not the replay satisfies the requirements for a full analysis.
This includes checking:
Expand Down Expand Up @@ -299,11 +354,11 @@ def can_do_full_analysis(self, first_touch_frames) -> bool:

return True

def start_time(self):
def _start_time(self):
self.timer = time.time()
logger.info("starting timer")

def log_time(self, message=""):
def _log_time(self, message=""):
end = time.time()
logger.info("Time taken for %s is %s milliseconds", message, (end - self.timer) * 1000)
self.timer = end
6 changes: 4 additions & 2 deletions carball/decompile_replays.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def decompile_replay(replay_path, output_path: str = None, overwrite: bool = Tru
def analyze_replay_file(replay_path: str, output_path: str = None, overwrite=True, controls: ControlsCreator = None,
sanity_check: SanityChecker = None, analysis_per_goal=False, rattletrap_path: str = None,
logging_level=logging.NOTSET,
calculate_intensive_events: bool = False):
calculate_intensive_events: bool = False,
clean: bool = True):
"""
Decompile and analyze a replay file.
Expand All @@ -41,6 +42,7 @@ def analyze_replay_file(replay_path: str, output_path: str = None, overwrite=Tru
:param force_full_analysis: If True full analysis will be performed even if checks say it should not.
:param logging_level: Sets the logging level globally across carball
:param calculate_intensive_events: Indicates if expensive calculations should run to include additional stats.
:param clean: Indicates if useless/invalid data should be found and removed.
:return: AnalysisManager of game with analysis.
"""

Expand All @@ -57,7 +59,7 @@ def analyze_replay_file(replay_path: str, output_path: str = None, overwrite=Tru
analysis = PerGoalAnalysis(game)
else:
analysis = AnalysisManager(game)
analysis.create_analysis(calculate_intensive_events=calculate_intensive_events)
analysis.create_analysis(calculate_intensive_events=calculate_intensive_events, clean=clean)

if controls is not None:
controls.get_controls(game)
Expand Down
6 changes: 4 additions & 2 deletions carball/extras/per_goal_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def __init__(self, game: Game):
super().__init__(game)
self.protobuf_games = []

def perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_map, data_frame, kickoff_frames):
def _perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_map,
data_frame, kickoff_frames, first_touch_frames,
calculate_intensive_events: bool = False, clean: bool = True):
self.protobuf_games = []
# split up frames
total_score = proto_game.game_metadata.score.team_0_score + proto_game.game_metadata.score.team_1_score
Expand All @@ -32,7 +34,7 @@ def perform_full_analysis(self, game: Game, proto_game: game_pb2.Game, player_ma
new_game.players = game.players
new_game.teams = game.teams
new_game.frames = split_pandas
super().perform_full_analysis(new_game, new_proto, player_map, split_pandas, kickoff_frames)
super()._perform_full_analysis(new_game, new_proto, player_map, split_pandas, kickoff_frames)
self.protobuf_games.append(new_proto)

def get_protobuf_data(self) -> List[game_pb2.Game]:
Expand Down
Loading