diff --git a/deepspeed/inference/engine.py b/deepspeed/inference/engine.py index c029aaa707e5..d2f9bf9cf0b2 100755 --- a/deepspeed/inference/engine.py +++ b/deepspeed/inference/engine.py @@ -253,7 +253,7 @@ def _post_forward_hook(self, module, input, output): else: get_accelerator().synchronize() self._end = time.time() - elapsed_time = self._end - self._start + elapsed_time = (self._end - self._start) * 1e3 # convert seconds to ms self._model_times.append(elapsed_time) def _create_model_parallel_group(self, config): @@ -612,7 +612,7 @@ def forward(self, *inputs, **kwargs): if self.model_profile_enabled and self._config.enable_cuda_graph: get_accelerator().synchronize() - duration = time.time() - start + duration = (time.time() - start) * 1e3 # convert seconds to ms self._model_times.append(duration) return outputs