diff --git a/src/neptune/common/hardware/gauges/gpu.py b/src/neptune/common/hardware/gauges/gpu.py index 373051f90..7eecf6cc9 100644 --- a/src/neptune/common/hardware/gauges/gpu.py +++ b/src/neptune/common/hardware/gauges/gpu.py @@ -66,7 +66,10 @@ def name(self): return str(self.card_index) def value(self): - return self.__gpu_monitor.get_card_power_usage(self.card_index) // MILLIWATTS_IN_ONE_WATT + power_usage = self.__gpu_monitor.get_card_power_usage(self.card_index) + if power_usage is None: + return None + return self.__gpu_monitor.get_card_power_usage(self.card_index) / MILLIWATTS_IN_ONE_WATT def __eq__(self, other): return self.__class__ == other.__class__ and self.card_index == other.card_index diff --git a/src/neptune/common/hardware/gpu/gpu_monitor.py b/src/neptune/common/hardware/gpu/gpu_monitor.py index e67dd4eee..801dfdb72 100644 --- a/src/neptune/common/hardware/gpu/gpu_monitor.py +++ b/src/neptune/common/hardware/gpu/gpu_monitor.py @@ -78,10 +78,10 @@ def __nvml_get_or_else(self, getter, default=None): try: nvmlInit() return getter() - except NVMLError: + except NVMLError as e: if not GPUMonitor.nvml_error_printed: warning = ( - "Info (NVML): %s. GPU usage metrics may not be reported. For more information, " + f"Info (NVML): {e}. GPU usage metrics may not be reported. For more information, " "see https://docs.neptune.ai/help/nvml_error/" ) warn_once(message=warning, exception=NeptuneWarning)