From 8f63a70daf8bea561c533e2184d4ddaf2a8a2c6e Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 14:30:08 +0100 Subject: [PATCH 01/15] Add TegraObserver which can monitor/control graphics clock on a tegra device --- kernel_tuner/core.py | 13 +- kernel_tuner/observers/tegra.py | 202 ++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 3 deletions(-) create mode 100644 kernel_tuner/observers/tegra.py diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index 174cd3af5..e5e3e3059 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -20,9 +20,8 @@ from kernel_tuner.backends.nvcuda import CudaFunctions from kernel_tuner.backends.opencl import OpenCLFunctions from kernel_tuner.backends.compiler import CompilerFunctions -from kernel_tuner.backends.opencl import OpenCLFunctions -from kernel_tuner.backends.hip import HipFunctions from kernel_tuner.observers.nvml import NVMLObserver +from kernel_tuner.observers.tegra import TegraObserver from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver try: @@ -315,8 +314,9 @@ def __init__( else: raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet") - # look for NVMLObserver in observers, if present, enable special tunable parameters through nvml + # look for NVMLObserver and TegraObserver in observers, if present, enable special tunable parameters through nvml/tegra self.use_nvml = False + self.use_tegra = False self.continuous_observers = [] self.output_observers = [] if observers: @@ -324,6 +324,9 @@ def __init__( if isinstance(obs, NVMLObserver): self.nvml = obs.nvml self.use_nvml = True + if isinstance(obs, TegraObserver): + self.tegra = obs.tegra + self.use_tegra = True if hasattr(obs, "continuous_observer"): self.continuous_observers.append(obs.continuous_observer) if isinstance(obs, OutputObserver): @@ -409,6 +412,10 @@ def benchmark(self, func, gpu_args, instance, verbose, objective): if "nvml_mem_clock" in instance.params: self.nvml.mem_clock = instance.params["nvml_mem_clock"] + if self.use_tegra: + if "tegra_gr_clock" in instance.params: + self.tegra.gr_clock = instance.params["tegra_gr_clock"] + # Call the observers to register the configuration to be benchmarked for obs in self.dev.observers: obs.register_configuration(instance.params) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py new file mode 100644 index 000000000..cc37ec902 --- /dev/null +++ b/kernel_tuner/observers/tegra.py @@ -0,0 +1,202 @@ +import subprocess +import time +from pathlib import Path + +import numpy as np + +from kernel_tuner.observers.observer import BenchmarkObserver + + +class tegra: + """Class that gathers the Tegra functionality for one device.""" + + def __init__(self): + """Create object to control GPU core clock on a Tegra device.""" + + self.dev_path = self.get_dev_path() + self.default_min_gr_clock = self._read_clock_file("min_freq") + self.default_max_gr_clock = self._read_clock_file("max_freq") + self.supported_gr_clocks = self._read_clock_file("available_frequencies") + + self.default_railgate_status = self._read_railgate_file() + + @staticmethod + def get_dev_path(device_id): + """Get the path to device core clock control in /sys""" + root_path = Path("/sys/devices/gpu.0") + gpu_id = root_path.readlink() + return root_path / Path("devfreq") / gpu_id + + def _read_railgate_file(self): + """Read railgate status""" + with open(self.dev_path / Path("device/railgate_enable")) as fp: + data = int(fp.read().strip()) + return data + + def _write_railgate_file(self, value): + """Set railgate status""" + if value not in (0, 1): + raise ValueError(f"Illegal governor value {value}, must be 0 or 1") + print(f"Writing {value} to railgate file") + full_path = self.dev_path / Path("device/railgate_enable") + args = [ + "sudo", + "sh", + "-c", + f"echo {value} > {str(full_path)}" + ] + subprocess.run(args, check=True) + + def _read_clock_file(self, fname): + """Read current or available frequency value(s) from a frequency control file""" + with open(self.dev_path / Path(fname)) as fp: + raw_data = np.array(fp.read().strip().split()) + if len(raw_data) > 1: + data = raw_data.astype(int) + else: + data = int(raw_data) + return data + + def _write_clock_file(self, fname, value): + """Write a frequency value to a core clock control file""" + available_files = ("min_freq", "max_freq") + if fname not in available_files: + raise ValueError(f"Illegal filename value: {fname}, must be one of {available_files}") + + if value not in self.supported_gr_clocks: + raise ValueError(f"Illegal frequency value {value}, must be one of {self.supported_gr_clocks}") + + full_path = self.dev_path / Path(fname) + args = [ + "sudo", + "sh", + "-c", + f"echo {value} > {str(full_path)}" + ] + subprocess.run(args, check=True) + + @property + def gr_clock(self): + """Control the core clock frequency""" + return self._read_clock_file("cur_freq") + + @gr_clock.setter + def gr_clock(self, new_clock): + self._write_railgate_file(0) + cur_clock = self._read_clock_file("cur_freq") + if new_clock > cur_clock: + self._write_clock_file("max_freq", new_clock) + self._write_clock_file("min_freq", new_clock) + elif new_clock < cur_clock: + self._write_clock_file("min_freq", new_clock) + self._write_clock_file("max_freq", new_clock) + # wait for the new clock to be applied + while (self._read_clock_file("cur_freq") != new_clock): + time.sleep(.001) + + def reset_clock(self): + """Reset the core clock frequency to the original values""" + self._write_clock_file("min_freq", self.default_min_gr_clock) + self._write_clock_file("max_freq", self.default_max_gr_clock) + self._write_railgate_file(self.default_railgate_status) + + def __del__(self): + # restore original core clocks + self.reset_clock() + + +class TegraObserver(BenchmarkObserver): + """Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device. + + :param observables: List of quantities should be observed during tuning, supported is: "core_freq" + :type observables: list of strings + + :param device: Device ordinal used to identify your device, typically 0 + :type device: integer + + :param save_all: If set to True, all data collected by the TegraObserver for every iteration during benchmarking will be returned. + If set to False, data will be aggregated over multiple iterations during benchmarking. False by default. + :type save_all: boolean + + """ + + def __init__( + self, + observables, + device=0, + save_all=False + ): + """Create a TegraObserver""" + self.tegra = tegra(device) + self.save_all = save_all + + supported = ["core_freq"] + for obs in observables: + if obs not in supported: + raise ValueError(f"Observable {obs} not in supported: {supported}") + self.observables = observables + + self.results = {} + for obs in self.observables: + self.results[obs + "s"] = [] + + self.during_obs = [ + obs + for obs in observables + if obs in ["core_freq"] + ] + + self.iteration = {obs: [] for obs in self.during_obs} + + def before_start(self): + # clear results of the observables for next measurement + self.iteration = {obs: [] for obs in self.during_obs} + + def after_start(self): + # ensure during is called at least once + self.during() + + def during(self): + if "core_freq" in self.observables: + self.iteration["core_freq"].append(self.tegra.gr_clock) + + def after_finish(self): + if "core_freq" in self.observables: + self.results["core_freqs"].append(np.average(self.iteration["core_freq"])) + + def get_results(self): + averaged_results = {} + + # return averaged results, except when save_all is True + for obs in self.observables: + # save all information, if the user requested + if self.save_all: + averaged_results[obs + "s"] = self.results[obs + "s"] + # save averaged results, default + averaged_results[obs] = np.average(self.results[obs + "s"]) + + # clear results for next round + for obs in self.observables: + self.results[obs + "s"] = [] + + return averaged_results + + +# High-level Helper functions + + +def get_tegra_gr_clocks(device=0, n=None, quiet=False): + """Get tunable parameter for Tegra graphics clock, n is desired number of values.""" + d = tegra(device) + gr_clocks = d.supported_gr_clocks + + if n and (len(gr_clocks) > n): + indices = np.array(np.ceil(np.linspace(0, len(gr_clocks) - 1, n)), dtype=int) + gr_clocks = np.array(gr_clocks)[indices] + + tune_params = dict() + tune_params["tegra_gr_clock"] = list(gr_clocks) + + if not quiet: + print("Using gr frequencies:", tune_params["tegra_gr_clock"]) + return tune_params From e2e604dffab989875248944a725ad667bf46e689 Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 14:31:56 +0100 Subject: [PATCH 02/15] Fix calls to tegra.__init__ --- kernel_tuner/observers/tegra.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index cc37ec902..c6cb1965c 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -127,7 +127,7 @@ def __init__( save_all=False ): """Create a TegraObserver""" - self.tegra = tegra(device) + self.tegra = tegra() self.save_all = save_all supported = ["core_freq"] @@ -187,7 +187,7 @@ def get_results(self): def get_tegra_gr_clocks(device=0, n=None, quiet=False): """Get tunable parameter for Tegra graphics clock, n is desired number of values.""" - d = tegra(device) + d = tegra() gr_clocks = d.supported_gr_clocks if n and (len(gr_clocks) > n): From aa2dd311370ee3b27214d56df5f16130b274f64e Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 14:35:49 +0100 Subject: [PATCH 03/15] Fix arguments of tegra get_dev_path --- kernel_tuner/observers/tegra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index c6cb1965c..26faf4dee 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -21,7 +21,7 @@ def __init__(self): self.default_railgate_status = self._read_railgate_file() @staticmethod - def get_dev_path(device_id): + def get_dev_path(): """Get the path to device core clock control in /sys""" root_path = Path("/sys/devices/gpu.0") gpu_id = root_path.readlink() From 565e79184af2ac0c974b1205c7178cb84923dba0 Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 14:36:16 +0100 Subject: [PATCH 04/15] remove debug print statement --- kernel_tuner/observers/tegra.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 26faf4dee..1ff5f79d4 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -37,7 +37,6 @@ def _write_railgate_file(self, value): """Set railgate status""" if value not in (0, 1): raise ValueError(f"Illegal governor value {value}, must be 0 or 1") - print(f"Writing {value} to railgate file") full_path = self.dev_path / Path("device/railgate_enable") args = [ "sudo", From 2de0c8210915b568ae7adf7ff7fb89cca29fb4a5 Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 14:51:58 +0100 Subject: [PATCH 05/15] Change way of finding path to GPU device to support both Jetson Nano and Orin --- kernel_tuner/observers/tegra.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 1ff5f79d4..88f006131 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -23,9 +23,16 @@ def __init__(self): @staticmethod def get_dev_path(): """Get the path to device core clock control in /sys""" - root_path = Path("/sys/devices/gpu.0") - gpu_id = root_path.readlink() - return root_path / Path("devfreq") / gpu_id + # loop to find GPU device name based on jetson_clocks + for dev in Path("/sys/class/devfreq").iterdir(): + with open(dev / Path("device/of_node/name")) as fp: + name = fp.read().strip().rstrip("\x00") + if name in ("gv11b", "gp10b", "ga10b", "gpu"): + root_path = dev + break + else: + raise FileNotFoundError("No internal tegra GPU found") + return root_path def _read_railgate_file(self): """Read railgate status""" From 178e09c254664cb6ac86f69ebf7e611933669f09 Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 15:07:25 +0100 Subject: [PATCH 06/15] Remove unused device parameter --- kernel_tuner/observers/tegra.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 88f006131..834edd00c 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -117,9 +117,6 @@ class TegraObserver(BenchmarkObserver): :param observables: List of quantities should be observed during tuning, supported is: "core_freq" :type observables: list of strings - :param device: Device ordinal used to identify your device, typically 0 - :type device: integer - :param save_all: If set to True, all data collected by the TegraObserver for every iteration during benchmarking will be returned. If set to False, data will be aggregated over multiple iterations during benchmarking. False by default. :type save_all: boolean @@ -129,7 +126,6 @@ class TegraObserver(BenchmarkObserver): def __init__( self, observables, - device=0, save_all=False ): """Create a TegraObserver""" From debb03849251570651927c6d2bf527351cfe2913 Mon Sep 17 00:00:00 2001 From: Leon Oostrum Date: Fri, 16 Feb 2024 15:10:14 +0100 Subject: [PATCH 07/15] Remove unused device parameter --- kernel_tuner/observers/tegra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 834edd00c..1cab26201 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -187,7 +187,7 @@ def get_results(self): # High-level Helper functions -def get_tegra_gr_clocks(device=0, n=None, quiet=False): +def get_tegra_gr_clocks(n=None, quiet=False): """Get tunable parameter for Tegra graphics clock, n is desired number of values.""" d = tegra() gr_clocks = d.supported_gr_clocks From 818e65a7883e47f50c41a9d42a84abff92989548 Mon Sep 17 00:00:00 2001 From: Ben van Werkhoven Date: Mon, 4 Mar 2024 16:28:52 +0100 Subject: [PATCH 08/15] only reset clocks if they have been set --- kernel_tuner/observers/tegra.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 1cab26201..0453f9efd 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -20,6 +20,8 @@ def __init__(self): self.default_railgate_status = self._read_railgate_file() + self.has_changed_clocks = False + @staticmethod def get_dev_path(): """Get the path to device core clock control in /sys""" @@ -65,6 +67,7 @@ def _read_clock_file(self, fname): def _write_clock_file(self, fname, value): """Write a frequency value to a core clock control file""" + self.has_changed_clocks = True available_files = ("min_freq", "max_freq") if fname not in available_files: raise ValueError(f"Illegal filename value: {fname}, must be one of {available_files}") @@ -107,8 +110,9 @@ def reset_clock(self): self._write_railgate_file(self.default_railgate_status) def __del__(self): - # restore original core clocks - self.reset_clock() + # restore original core clocks, if changed + if self.has_changed_clocks: + self.reset_clock() class TegraObserver(BenchmarkObserver): From 7f7e01ed5e78a2606e995d04a8b8bca5380d32d2 Mon Sep 17 00:00:00 2001 From: Ben van Werkhoven Date: Mon, 18 Mar 2024 13:36:09 +0100 Subject: [PATCH 09/15] example use of tegra observer --- examples/cuda/vector_add_tegra_observer.py | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 examples/cuda/vector_add_tegra_observer.py diff --git a/examples/cuda/vector_add_tegra_observer.py b/examples/cuda/vector_add_tegra_observer.py new file mode 100755 index 000000000..43b7e4166 --- /dev/null +++ b/examples/cuda/vector_add_tegra_observer.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +"""This is the minimal example from the README""" + +import json + +import numpy +from kernel_tuner import tune_kernel +from kernel_tuner.observers.tegra import TegraObserver + +def tune(): + + kernel_string = """ + __global__ void vector_add(float *c, float *a, float *b, int n) { + int i = blockIdx.x * block_size_x + threadIdx.x; + if (i Date: Mon, 18 Mar 2024 15:05:45 +0100 Subject: [PATCH 10/15] add comment to document min-max setting behavior --- kernel_tuner/observers/tegra.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 0453f9efd..486da5c8a 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -93,6 +93,7 @@ def gr_clock(self): def gr_clock(self, new_clock): self._write_railgate_file(0) cur_clock = self._read_clock_file("cur_freq") + # system will ignore if we set new min higher than current max, or vice versa if new_clock > cur_clock: self._write_clock_file("max_freq", new_clock) self._write_clock_file("min_freq", new_clock) From 24dd06dcc51a8c41697e0d35b622a41b0628d69f Mon Sep 17 00:00:00 2001 From: Martijn Date: Thu, 4 Jul 2024 20:50:11 +0200 Subject: [PATCH 11/15] Added temperature and energy readings + continuous observer --- kernel_tuner/observers/tegra.py | 191 +++++++++++++++++++++++++++++--- 1 file changed, 177 insertions(+), 14 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 486da5c8a..68823d5c6 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -1,27 +1,40 @@ import subprocess import time from pathlib import Path +import os import numpy as np -from kernel_tuner.observers.observer import BenchmarkObserver +from kernel_tuner.observers.observer import BenchmarkObserver, ContinuousObserver +from kernel_tuner.observers.pmt import PMTObserver +from kernel_tuner.observers.powersensor import PowerSensorObserver class tegra: """Class that gathers the Tegra functionality for one device.""" - def __init__(self): + def __init__(self, powerPath, tempPath): + self.has_changed_clocks = False """Create object to control GPU core clock on a Tegra device.""" - + # Get paths self.dev_path = self.get_dev_path() + if tempPath == "": + self.gpu_temp_path = self.get_temp_path() + else: + self.gpu_temp_path = tempPath + if powerPath == "": + self.gpu_power_path = self.get_power_path() + else: + self.gpu_power_path = powerPath + self.gpu_channel = self.get_gpu_channel() + + # Read default clock values self.default_min_gr_clock = self._read_clock_file("min_freq") self.default_max_gr_clock = self._read_clock_file("max_freq") self.supported_gr_clocks = self._read_clock_file("available_frequencies") self.default_railgate_status = self._read_railgate_file() - - self.has_changed_clocks = False - + @staticmethod def get_dev_path(): """Get the path to device core clock control in /sys""" @@ -36,6 +49,49 @@ def get_dev_path(): raise FileNotFoundError("No internal tegra GPU found") return root_path + def get_temp_path(self): + """Find the file which holds the GPU temperature""" + for zone in Path("/sys/class/thermal").iterdir(): + with open(zone / Path("type")) as fp: + name = fp.read().strip() + if name == "GPU-therm": + gpu_temp_path = zone + "/" + break + else: + raise FileNotFoundError("No GPU sensor for temperature found") + + return gpu_temp_path + + def get_power_path(self, start_path="/sys/bus/i2c/drivers/ina3221"): + """Recursively search for a file which holds power readings + starting from start_path.""" + for entry in os.listdir(start_path): + path = os.path.join(start_path, entry) + if os.path.isfile(path) and entry == "curr1_input": + return start_path + "/" + elif entry in start_path: + continue + elif os.path.isdir(path): + result = self.get_power_path(path) + if result: + return result + return None + + def get_gpu_channel(self): + """Get the channel number of the sensor which measures the GPU power""" + + # Iterate over all channels in the of_node dir of the power path to + # find the channel which holds GPU power information + for channel_dir in Path(self.gpu_power_path + "of_node/").iterdir(): + if("channel@" in channel_dir.name): + with open(channel_dir / Path("label")) as fp: + channel_label = fp.read().strip() + if "GPU" in channel_label: + return str(int(channel_dir.name[-1])+1) + + # If this statement is reached, no channel for the GPU was found + raise FileNotFoundError("No channel found with GPU power readings") + def _read_railgate_file(self): """Read railgate status""" with open(self.dev_path / Path("device/railgate_enable")) as fp: @@ -115,7 +171,22 @@ def __del__(self): if self.has_changed_clocks: self.reset_clock() - + def read_gpu_temp(self): + """Read GPU temperature""" + with open(self.gpu_temp_path + "temp") as fp: + temp = int(fp.read()) + return temp / 1000 + + def read_gpu_power(self): + """Read the current and voltage to calculate and return the power int watt""" + + result_cur = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}curr{self.gpu_channel}_input"], capture_output=True, text=True) + current = int(result_cur.stdout.strip()) / 1000 + result_vol = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}in{self.gpu_channel}_input"], capture_output=True, text=True) + voltage = int(result_vol.stdout.strip()) / 1000 + + return current * voltage + class TegraObserver(BenchmarkObserver): """Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device. @@ -131,18 +202,33 @@ class TegraObserver(BenchmarkObserver): def __init__( self, observables, - save_all=False + save_all=False, + powerPath="", + tempPath="" ): """Create a TegraObserver""" - self.tegra = tegra() + self.tegra = tegra(powerPath=powerPath, tempPath=tempPath) self.save_all = save_all - - supported = ["core_freq"] + self._set_units = False + + supported = ["core_freq", "gpu_temp", "gpu_power", "gpu_energy"] for obs in observables: if obs not in supported: raise ValueError(f"Observable {obs} not in supported: {supported}") self.observables = observables - + + # Observe power measurements with the continuous observer + self.measure_power = False + self.needs_power = ["gpu_power", "gpu_energy"] + if any([obs in self.needs_power for obs in observables]): + self.measure_power = True + power_observables = [obs for obs in observables if obs in self.needs_power] + self.continuous_observer = tegraPowerObserver( + power_observables, self, continous_duration=3 + ) + # remove power observables + self.observables = [obs for obs in observables if obs not in self.needs_power] + self.results = {} for obs in self.observables: self.results[obs + "s"] = [] @@ -150,27 +236,37 @@ def __init__( self.during_obs = [ obs for obs in observables - if obs in ["core_freq"] + if obs in ["core_freq", "gpu_temp"] ] self.iteration = {obs: [] for obs in self.during_obs} + def before_start(self): # clear results of the observables for next measurement self.iteration = {obs: [] for obs in self.during_obs} + # Set the power unit to Watts + if self._set_units == False: + self.dev.units["power"] = "W" + self._set_units = True def after_start(self): + self.t0 = time.perf_counter() # ensure during is called at least once self.during() def during(self): if "core_freq" in self.observables: self.iteration["core_freq"].append(self.tegra.gr_clock) + if "gpu_temp" in self.observables: + self.iteration["gpu_temp"].append(self.tegra.read_gpu_temp()) def after_finish(self): if "core_freq" in self.observables: self.results["core_freqs"].append(np.average(self.iteration["core_freq"])) - + if "gpu_temp" in self.observables: + self.results["gpu_temps"].append(np.average(self.iteration["gpu_temp"])) + def get_results(self): averaged_results = {} @@ -207,3 +303,70 @@ def get_tegra_gr_clocks(n=None, quiet=False): if not quiet: print("Using gr frequencies:", tune_params["tegra_gr_clock"]) return tune_params + + +class tegraPowerObserver(ContinuousObserver): + """Observer that measures power using tegra and continuous benchmarking.""" + def __init__(self, observables, parent, continous_duration=1): + self.parent = parent + + supported = ["gpu_power", "gpu_energy"] + for obs in observables: + if obs not in supported: + raise ValueError(f"Observable {obs} not in supported: {supported}") + self.observables = observables + + # duration in seconds + self.continuous_duration = continous_duration + + self.power = 0 + self.energy = 0 + self.power_readings = [] + self.t0 = 0 + + # results from the last iteration-based benchmark + self.results = None + + def before_start(self): + self.parent.before_start() + self.power = 0 + self.energy = 0 + self.power_readings = [] + + def after_start(self): + self.parent.after_start() + self.t0 = time.perf_counter() + + def during(self): + self.parent.during() + power_usage = self.parent.tegra.read_gpu_power() + timestamp = time.perf_counter() - self.t0 + # only store the result if we get a new measurement from tegra + if len(self.power_readings) == 0 or ( + self.power_readings[-1][1] != power_usage + or timestamp - self.power_readings[-1][0] > 0.01 + ): + self.power_readings.append([timestamp, power_usage]) + + def after_finish(self): + self.parent.after_finish() + # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything + if not self.power_readings: + return + + # convert to seconds from milliseconds + execution_time = self.results["time"] / 1e3 + self.power = np.median([d[1] for d in self.power_readings]) + self.energy = self.power * execution_time + + def get_results(self): + results = self.parent.get_results() + keys = list(results.keys()) + for key in keys: + results["pwr_" + key] = results.pop(key) + if "gpu_power" in self.observables: + results["gpu_power"] = self.power + if "gpu_energy" in self.observables: + results["gpu_energy"] = self.energy + + return results \ No newline at end of file From f937c3e7d920f947e8b8e792514a60a70db1269f Mon Sep 17 00:00:00 2001 From: Martijn Date: Sun, 7 Jul 2024 23:19:15 +0200 Subject: [PATCH 12/15] fix: mistake in get_temp_path() solved --- kernel_tuner/observers/tegra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 68823d5c6..9d77be82b 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -55,7 +55,7 @@ def get_temp_path(self): with open(zone / Path("type")) as fp: name = fp.read().strip() if name == "GPU-therm": - gpu_temp_path = zone + "/" + gpu_temp_path = str(zone) + "/" break else: raise FileNotFoundError("No GPU sensor for temperature found") From 7faff0c5fa820de73a4192569bd989a48b6f7bdd Mon Sep 17 00:00:00 2001 From: Martijn Date: Tue, 23 Jul 2024 16:26:31 +0200 Subject: [PATCH 13/15] refactor: tidy up code --- kernel_tuner/observers/tegra.py | 76 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 9d77be82b..595bbb47c 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -13,28 +13,29 @@ class tegra: """Class that gathers the Tegra functionality for one device.""" - def __init__(self, powerPath, tempPath): - self.has_changed_clocks = False + def __init__(self, power_path, temp_path): """Create object to control GPU core clock on a Tegra device.""" + self.has_changed_clocks = False + # Get paths self.dev_path = self.get_dev_path() - if tempPath == "": + if temp_path == "": self.gpu_temp_path = self.get_temp_path() else: - self.gpu_temp_path = tempPath - if powerPath == "": + self.gpu_temp_path = temp_path + if power_path == "": self.gpu_power_path = self.get_power_path() else: - self.gpu_power_path = powerPath + self.gpu_power_path = power_path self.gpu_channel = self.get_gpu_channel() - + # Read default clock values self.default_min_gr_clock = self._read_clock_file("min_freq") self.default_max_gr_clock = self._read_clock_file("max_freq") self.supported_gr_clocks = self._read_clock_file("available_frequencies") self.default_railgate_status = self._read_railgate_file() - + @staticmethod def get_dev_path(): """Get the path to device core clock control in /sys""" @@ -55,20 +56,20 @@ def get_temp_path(self): with open(zone / Path("type")) as fp: name = fp.read().strip() if name == "GPU-therm": - gpu_temp_path = str(zone) + "/" + gpu_temp_path = str(zone) break - else: + + if gpu_temp_path is None: raise FileNotFoundError("No GPU sensor for temperature found") - + return gpu_temp_path def get_power_path(self, start_path="/sys/bus/i2c/drivers/ina3221"): - """Recursively search for a file which holds power readings - starting from start_path.""" + """Search for a file which holds power readings""" for entry in os.listdir(start_path): path = os.path.join(start_path, entry) if os.path.isfile(path) and entry == "curr1_input": - return start_path + "/" + return start_path elif entry in start_path: continue elif os.path.isdir(path): @@ -79,10 +80,9 @@ def get_power_path(self, start_path="/sys/bus/i2c/drivers/ina3221"): def get_gpu_channel(self): """Get the channel number of the sensor which measures the GPU power""" - - # Iterate over all channels in the of_node dir of the power path to - # find the channel which holds GPU power information - for channel_dir in Path(self.gpu_power_path + "of_node/").iterdir(): + # Iterate over all channels in the of_node dir of the power path to + # find the channel which holds GPU power information + for channel_dir in Path(self.gpu_power_path + "/of_node/").iterdir(): if("channel@" in channel_dir.name): with open(channel_dir / Path("label")) as fp: channel_label = fp.read().strip() @@ -173,18 +173,18 @@ def __del__(self): def read_gpu_temp(self): """Read GPU temperature""" - with open(self.gpu_temp_path + "temp") as fp: + with open(self.gpu_temp_path + "/temp") as fp: temp = int(fp.read()) return temp / 1000 - + def read_gpu_power(self): """Read the current and voltage to calculate and return the power int watt""" - - result_cur = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}curr{self.gpu_channel}_input"], capture_output=True, text=True) + + result_cur = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}/curr{self.gpu_channel}_input"], capture_output=True, text=True) current = int(result_cur.stdout.strip()) / 1000 - result_vol = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}in{self.gpu_channel}_input"], capture_output=True, text=True) + result_vol = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}/in{self.gpu_channel}_input"], capture_output=True, text=True) voltage = int(result_vol.stdout.strip()) / 1000 - + return current * voltage class TegraObserver(BenchmarkObserver): @@ -203,20 +203,20 @@ def __init__( self, observables, save_all=False, - powerPath="", - tempPath="" + power_path="", + temp_path="" ): """Create a TegraObserver""" - self.tegra = tegra(powerPath=powerPath, tempPath=tempPath) + self.tegra = tegra(power_path=power_path, temp_path=temp_path) self.save_all = save_all self._set_units = False - + supported = ["core_freq", "gpu_temp", "gpu_power", "gpu_energy"] for obs in observables: if obs not in supported: raise ValueError(f"Observable {obs} not in supported: {supported}") self.observables = observables - + # Observe power measurements with the continuous observer self.measure_power = False self.needs_power = ["gpu_power", "gpu_energy"] @@ -228,7 +228,7 @@ def __init__( ) # remove power observables self.observables = [obs for obs in observables if obs not in self.needs_power] - + self.results = {} for obs in self.observables: self.results[obs + "s"] = [] @@ -309,13 +309,13 @@ class tegraPowerObserver(ContinuousObserver): """Observer that measures power using tegra and continuous benchmarking.""" def __init__(self, observables, parent, continous_duration=1): self.parent = parent - + supported = ["gpu_power", "gpu_energy"] for obs in observables: if obs not in supported: raise ValueError(f"Observable {obs} not in supported: {supported}") self.observables = observables - + # duration in seconds self.continuous_duration = continous_duration @@ -326,17 +326,17 @@ def __init__(self, observables, parent, continous_duration=1): # results from the last iteration-based benchmark self.results = None - + def before_start(self): self.parent.before_start() self.power = 0 self.energy = 0 self.power_readings = [] - + def after_start(self): self.parent.after_start() self.t0 = time.perf_counter() - + def during(self): self.parent.during() power_usage = self.parent.tegra.read_gpu_power() @@ -347,7 +347,7 @@ def during(self): or timestamp - self.power_readings[-1][0] > 0.01 ): self.power_readings.append([timestamp, power_usage]) - + def after_finish(self): self.parent.after_finish() # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything @@ -358,7 +358,7 @@ def after_finish(self): execution_time = self.results["time"] / 1e3 self.power = np.median([d[1] for d in self.power_readings]) self.energy = self.power * execution_time - + def get_results(self): results = self.parent.get_results() keys = list(results.keys()) @@ -368,5 +368,5 @@ def get_results(self): results["gpu_power"] = self.power if "gpu_energy" in self.observables: results["gpu_energy"] = self.energy - + return results \ No newline at end of file From f090f8deb65c5ed823219c7a7f2ed14b437b18b3 Mon Sep 17 00:00:00 2001 From: Ben van Werkhoven Date: Thu, 3 Oct 2024 16:21:12 +0200 Subject: [PATCH 14/15] reduced code duplication between tegra and nvml continuous observers --- kernel_tuner/observers/nvml.py | 77 ++------------------------ kernel_tuner/observers/observer.py | 72 +++++++++++++++++++++++- kernel_tuner/observers/tegra.py | 89 +++++------------------------- 3 files changed, 88 insertions(+), 150 deletions(-) diff --git a/kernel_tuner/observers/nvml.py b/kernel_tuner/observers/nvml.py index d33327a3c..7a41fd517 100644 --- a/kernel_tuner/observers/nvml.py +++ b/kernel_tuner/observers/nvml.py @@ -384,9 +384,7 @@ def __init__( if any([obs in self.needs_power for obs in observables]): self.measure_power = True power_observables = [obs for obs in observables if obs in self.needs_power] - self.continuous_observer = NVMLPowerObserver( - power_observables, self, self.nvml, continous_duration - ) + self.continuous_observer = ContinuousObserver("nvml", self, power_observables, continous_duration=continuous_duration) # remove power observables self.observables = [obs for obs in observables if obs not in self.needs_power] @@ -408,6 +406,9 @@ def __init__( ] self.iteration = {obs: [] for obs in self.during_obs} + def read_power(self): + return self.nvml.pwr_usage() + def before_start(self): # clear results of the observables for next measurement self.iteration = {obs: [] for obs in self.during_obs} @@ -471,76 +472,6 @@ def get_results(self): return averaged_results -class NVMLPowerObserver(ContinuousObserver): - """Observer that measures power using NVML and continuous benchmarking.""" - - def __init__(self, observables, parent, nvml_instance, continous_duration=1): - self.parent = parent - self.nvml = nvml_instance - - supported = ["power_readings", "nvml_power", "nvml_energy"] - for obs in observables: - if obs not in supported: - raise ValueError(f"Observable {obs} not in supported: {supported}") - self.observables = observables - - # duration in seconds - self.continuous_duration = continous_duration - - self.power = 0 - self.energy = 0 - self.power_readings = [] - self.t0 = 0 - - # results from the last iteration-based benchmark - self.results = None - - def before_start(self): - self.parent.before_start() - self.power = 0 - self.energy = 0 - self.power_readings = [] - - def after_start(self): - self.parent.after_start() - self.t0 = time.perf_counter() - - def during(self): - self.parent.during() - power_usage = self.nvml.pwr_usage() - timestamp = time.perf_counter() - self.t0 - # only store the result if we get a new measurement from NVML - if len(self.power_readings) == 0 or ( - self.power_readings[-1][1] != power_usage - or timestamp - self.power_readings[-1][0] > 0.01 - ): - self.power_readings.append([timestamp, power_usage]) - - def after_finish(self): - self.parent.after_finish() - # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything - if not self.power_readings: - return - - # convert to seconds from milliseconds - execution_time = self.results["time"] / 1e3 - self.power = np.median([d[1] / 1e3 for d in self.power_readings]) - self.energy = self.power * execution_time - - def get_results(self): - results = self.parent.get_results() - keys = list(results.keys()) - for key in keys: - results["pwr_" + key] = results.pop(key) - if "nvml_energy" in self.observables: - results["nvml_energy"] = self.energy - if "nvml_power" in self.observables: - results["nvml_power"] = self.power - if "power_readings" in self.observables: - results["power_readings"] = self.power_readings - return results - - # High-level Helper functions diff --git a/kernel_tuner/observers/observer.py b/kernel_tuner/observers/observer.py index 493de94f8..32ea6eb82 100644 --- a/kernel_tuner/observers/observer.py +++ b/kernel_tuner/observers/observer.py @@ -44,8 +44,78 @@ class IterationObserver(BenchmarkObserver): class ContinuousObserver(BenchmarkObserver): - pass + """Generic observer that measures power while and continuous benchmarking. + + To support continuous benchmarking an Observer should support: + a .read_power() method, which the ContinuousObserver can call to read power + """ + def __init__(self, name, observables, parent, continous_duration=1): + self.parent = parent + self.name = name + + supported = [self.name + "_power", self.name + "_energy", "power_readings"] + for obs in observables: + if obs not in supported: + raise ValueError(f"Observable {obs} not in supported: {supported}") + self.observables = observables + + # duration in seconds + self.continuous_duration = continous_duration + + self.power = 0 + self.energy = 0 + self.power_readings = [] + self.t0 = 0 + + # results from the last iteration-based benchmark + # these are set by the benchmarking function of Kernel Tuner before + # the continuous observer is called. + self.results = None + + def before_start(self): + self.parent.before_start() + self.power = 0 + self.energy = 0 + self.power_readings = [] + def after_start(self): + self.parent.after_start() + self.t0 = time.perf_counter() + + def during(self): + self.parent.during() + power_usage = self.parent.read_power() + timestamp = time.perf_counter() - self.t0 + # only store the result if we get a new measurement from the GPU + if len(self.power_readings) == 0 or ( + self.power_readings[-1][1] != power_usage + or timestamp - self.power_readings[-1][0] > 0.01 + ): + self.power_readings.append([timestamp, power_usage]) + + def after_finish(self): + self.parent.after_finish() + # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything + if not self.power_readings: + return + + # convert to seconds from milliseconds + execution_time = self.results["time"] / 1e3 + self.power = np.median([d[1] for d in self.power_readings]) + self.energy = self.power * execution_time + + def get_results(self): + results = self.parent.get_results() + keys = list(results.keys()) + for key in keys: + results["pwr_" + key] = results.pop(key) + if self.name + "_power" in self.observables: + results[self.name + "_power"] = self.power + if self.name + "_energy" in self.observables: + results[self.name + "_energy"] = self.energy + if "power_readings" in self.observables: + results["power_readings"] = self.power_readings + return results class OutputObserver(BenchmarkObserver): """Observer that can verify or measure something about the output produced by a kernel.""" diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index 595bbb47c..fc3e54de2 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -186,7 +186,8 @@ def read_gpu_power(self): voltage = int(result_vol.stdout.strip()) / 1000 return current * voltage - + + class TegraObserver(BenchmarkObserver): """Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device. @@ -211,7 +212,7 @@ def __init__( self.save_all = save_all self._set_units = False - supported = ["core_freq", "gpu_temp", "gpu_power", "gpu_energy"] + supported = ["core_freq", "tegra_temp", "tegra_power", "tegra_energy"] for obs in observables: if obs not in supported: raise ValueError(f"Observable {obs} not in supported: {supported}") @@ -219,13 +220,12 @@ def __init__( # Observe power measurements with the continuous observer self.measure_power = False - self.needs_power = ["gpu_power", "gpu_energy"] + self.needs_power = ["tegra_power", "tegra_energy"] if any([obs in self.needs_power for obs in observables]): self.measure_power = True power_observables = [obs for obs in observables if obs in self.needs_power] - self.continuous_observer = tegraPowerObserver( - power_observables, self, continous_duration=3 - ) + self.continuous_observer = ContinuousObserver("tegra", power_observables, self, continous_duration=3) + # remove power observables self.observables = [obs for obs in observables if obs not in self.needs_power] @@ -236,11 +236,15 @@ def __init__( self.during_obs = [ obs for obs in observables - if obs in ["core_freq", "gpu_temp"] + if obs in ["core_freq", "tegra_temp"] ] self.iteration = {obs: [] for obs in self.during_obs} - + + + def read_power(self): + return self.tegra.read_gpu_power() + def before_start(self): # clear results of the observables for next measurement @@ -266,7 +270,7 @@ def after_finish(self): self.results["core_freqs"].append(np.average(self.iteration["core_freq"])) if "gpu_temp" in self.observables: self.results["gpu_temps"].append(np.average(self.iteration["gpu_temp"])) - + def get_results(self): averaged_results = {} @@ -303,70 +307,3 @@ def get_tegra_gr_clocks(n=None, quiet=False): if not quiet: print("Using gr frequencies:", tune_params["tegra_gr_clock"]) return tune_params - - -class tegraPowerObserver(ContinuousObserver): - """Observer that measures power using tegra and continuous benchmarking.""" - def __init__(self, observables, parent, continous_duration=1): - self.parent = parent - - supported = ["gpu_power", "gpu_energy"] - for obs in observables: - if obs not in supported: - raise ValueError(f"Observable {obs} not in supported: {supported}") - self.observables = observables - - # duration in seconds - self.continuous_duration = continous_duration - - self.power = 0 - self.energy = 0 - self.power_readings = [] - self.t0 = 0 - - # results from the last iteration-based benchmark - self.results = None - - def before_start(self): - self.parent.before_start() - self.power = 0 - self.energy = 0 - self.power_readings = [] - - def after_start(self): - self.parent.after_start() - self.t0 = time.perf_counter() - - def during(self): - self.parent.during() - power_usage = self.parent.tegra.read_gpu_power() - timestamp = time.perf_counter() - self.t0 - # only store the result if we get a new measurement from tegra - if len(self.power_readings) == 0 or ( - self.power_readings[-1][1] != power_usage - or timestamp - self.power_readings[-1][0] > 0.01 - ): - self.power_readings.append([timestamp, power_usage]) - - def after_finish(self): - self.parent.after_finish() - # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything - if not self.power_readings: - return - - # convert to seconds from milliseconds - execution_time = self.results["time"] / 1e3 - self.power = np.median([d[1] for d in self.power_readings]) - self.energy = self.power * execution_time - - def get_results(self): - results = self.parent.get_results() - keys = list(results.keys()) - for key in keys: - results["pwr_" + key] = results.pop(key) - if "gpu_power" in self.observables: - results["gpu_power"] = self.power - if "gpu_energy" in self.observables: - results["gpu_energy"] = self.energy - - return results \ No newline at end of file From a8d6a3a6307a8d202676c2e73c57448d2b3ef268 Mon Sep 17 00:00:00 2001 From: Ben van Werkhoven Date: Thu, 3 Oct 2024 16:33:47 +0200 Subject: [PATCH 15/15] fix typos --- kernel_tuner/observers/nvml.py | 7 ++++--- kernel_tuner/observers/observer.py | 9 +++++---- kernel_tuner/observers/tegra.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel_tuner/observers/nvml.py b/kernel_tuner/observers/nvml.py index 7a41fd517..46ec0bd4a 100644 --- a/kernel_tuner/observers/nvml.py +++ b/kernel_tuner/observers/nvml.py @@ -352,7 +352,7 @@ def __init__( save_all=False, nvidia_smi_fallback=None, use_locked_clocks=False, - continous_duration=1, + continuous_duration=1, ): """Create an NVMLObserver.""" if nvidia_smi_fallback: @@ -384,7 +384,7 @@ def __init__( if any([obs in self.needs_power for obs in observables]): self.measure_power = True power_observables = [obs for obs in observables if obs in self.needs_power] - self.continuous_observer = ContinuousObserver("nvml", self, power_observables, continous_duration=continuous_duration) + self.continuous_observer = ContinuousObserver("nvml", power_observables, self, continuous_duration=continuous_duration) # remove power observables self.observables = [obs for obs in observables if obs not in self.needs_power] @@ -407,7 +407,8 @@ def __init__( self.iteration = {obs: [] for obs in self.during_obs} def read_power(self): - return self.nvml.pwr_usage() + """ Return power in Watt """ + return self.nvml.pwr_usage() / 1e3 def before_start(self): # clear results of the observables for next measurement diff --git a/kernel_tuner/observers/observer.py b/kernel_tuner/observers/observer.py index 32ea6eb82..e74bc433d 100644 --- a/kernel_tuner/observers/observer.py +++ b/kernel_tuner/observers/observer.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod - +import time +import numpy as np class BenchmarkObserver(ABC): """Base class for Benchmark Observers""" @@ -47,9 +48,9 @@ class ContinuousObserver(BenchmarkObserver): """Generic observer that measures power while and continuous benchmarking. To support continuous benchmarking an Observer should support: - a .read_power() method, which the ContinuousObserver can call to read power + a .read_power() method, which the ContinuousObserver can call to read power in Watt """ - def __init__(self, name, observables, parent, continous_duration=1): + def __init__(self, name, observables, parent, continuous_duration=1): self.parent = parent self.name = name @@ -60,7 +61,7 @@ def __init__(self, name, observables, parent, continous_duration=1): self.observables = observables # duration in seconds - self.continuous_duration = continous_duration + self.continuous_duration = continuous_duration self.power = 0 self.energy = 0 diff --git a/kernel_tuner/observers/tegra.py b/kernel_tuner/observers/tegra.py index fc3e54de2..efc83048c 100644 --- a/kernel_tuner/observers/tegra.py +++ b/kernel_tuner/observers/tegra.py @@ -224,7 +224,7 @@ def __init__( if any([obs in self.needs_power for obs in observables]): self.measure_power = True power_observables = [obs for obs in observables if obs in self.needs_power] - self.continuous_observer = ContinuousObserver("tegra", power_observables, self, continous_duration=3) + self.continuous_observer = ContinuousObserver("tegra", power_observables, self, continuous_duration=3) # remove power observables self.observables = [obs for obs in observables if obs not in self.needs_power]