From 888a55e230fdea5305dc33423ca033397d255ec4 Mon Sep 17 00:00:00 2001 From: Vinicius Reis Date: Mon, 10 Feb 2020 08:42:48 -0800 Subject: [PATCH] Performance logging Differential Revision: D19739656 fbshipit-source-id: 3183e4825385e08e3f2137cde8cca0f928285f06 --- classy_vision/hooks/tensorboard_plot_hook.py | 10 ++++++++++ classy_vision/tasks/classification_task.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/classy_vision/hooks/tensorboard_plot_hook.py b/classy_vision/hooks/tensorboard_plot_hook.py index 284a52568c..419e9e63af 100644 --- a/classy_vision/hooks/tensorboard_plot_hook.py +++ b/classy_vision/hooks/tensorboard_plot_hook.py @@ -140,6 +140,16 @@ def on_phase_end( ) continue + if hasattr(task, "perf_log"): + for log in task.perf_log: + phase_idx = log["phase_idx"] + for metric_name, metric_value in log.items(): + self.tb_writer.add_scalar( + f"Performance/{metric_name}", metric_value, global_step=phase_idx + ) + self.perf_log = [] + + # flush so that the plots aren't lost if training crashes soon after self.tb_writer.flush() logging.info(f"Done plotting to Tensorboard") diff --git a/classy_vision/tasks/classification_task.py b/classy_vision/tasks/classification_task.py index 252e5c233a..a2224817dc 100644 --- a/classy_vision/tasks/classification_task.py +++ b/classy_vision/tasks/classification_task.py @@ -9,6 +9,7 @@ import logging from typing import Any, Dict, List, Optional, Union +import time import torch from classy_vision.dataset import ClassyDataset, build_dataset from classy_vision.generic.distributed_util import ( @@ -807,18 +808,37 @@ def get_global_batchsize(self): def on_start(self, local_variables): self.run_hooks(local_variables, ClassyHookFunctions.on_start.name) + self.perf_log = [] + def on_phase_start(self, local_variables): self.advance_phase() self.run_hooks(local_variables, ClassyHookFunctions.on_phase_start.name) + # We do this manually instead of writing a hook for it because we want + # to make sure this runs after every other hook. + self.phase_start_time = time.perf_counter() + def on_phase_end(self, local_variables): + self.log_phase_end() + logging.info("Syncing meters on phase end...") for meter in self.meters: meter.sync_state() logging.info("...meters synced") barrier() + self.run_hooks(local_variables, ClassyHookFunctions.on_phase_end.name) def on_end(self, local_variables): self.run_hooks(local_variables, ClassyHookFunctions.on_end.name) + + def log_phase_end(self): + if not self.train: + return + + assert self.phase_type == "train" + + phase_duration = time.perf_counter() - self.phase_start_time + im_per_sec = (self.get_global_batchsize() * len(self.dataloaders[self.phase_type])) / phase_duration + self.perf_log.append(dict(phase_idx=self.train_phase_idx, time=phase_duration, im_per_sec=im_per_sec))