Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hip local memory error handling #284

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions kernel_tuner/backends/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,23 @@ def compile(self, kernel_instance):
if platform.system() == "Darwin":
lib_extension = ".dylib"

subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"])
subprocess.check_call(
subprocess.run(
[self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)

subprocess.run(
[self.compiler, filename + ".o"]
+ compiler_options
+ ["-shared", "-o", filename + lib_extension]
+ lib_args
+ lib_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)

self.lib = np.ctypeslib.load_library(filename, ".")
Expand Down Expand Up @@ -396,10 +407,16 @@ def memcpy_htod(self, dest, src):

def cleanup_lib(self):
"""unload the previously loaded shared library"""
if self.lib is None:
return

if not self.using_openmp and not self.using_openacc:
# this if statement is necessary because shared libraries that use
# OpenMP will core dump when unloaded, this is a well-known issue with OpenMP
logging.debug("unloading shared library")
_ctypes.dlclose(self.lib._handle)
try:
_ctypes.dlclose(self.lib._handle)
finally:
self.lib = None

units = {}
6 changes: 4 additions & 2 deletions kernel_tuner/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,8 +647,10 @@ def compile_kernel(self, instance, verbose):
shared_mem_error_messages = [
"uses too much shared data",
"local memory limit exceeded",
r"local memory \(\d+\) exceeds limit \(\d+\)",
]
if any(msg in str(e) for msg in shared_mem_error_messages):
error_message = str(e.stderr) if hasattr(e, "stderr") else str(e)
if any(re.search(msg, error_message) for msg in shared_mem_error_messages):
logging.debug(
"compile_kernel failed due to kernel using too much shared memory"
)
Expand Down Expand Up @@ -715,7 +717,7 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose)
)

# check for templated kernel
if kernel_source.lang in ["CUDA", "NVCUDA"] and "<" in name and ">" in name:
if kernel_source.lang in ["CUDA", "NVCUDA", "HIP"] and "<" in name and ">" in name:
kernel_string, name = wrap_templated_kernel(kernel_string, name)

# Preprocess GPU arguments. Require for handling `Tunable` arguments
Expand Down
4 changes: 2 additions & 2 deletions test/test_compiler_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ def test_compile_detects_device_code(npct, subprocess):
cfunc = CompilerFunctions()
cfunc.compile(kernel_instance)

print(subprocess.check_call.call_args_list)
print(subprocess.run.call_args_list)

# assert the filename suffix used for source compilation is .cu
dot_cu_used = False
for call in subprocess.check_call.call_args_list:
for call in subprocess.run.call_args_list:
args, kwargs = call
args = args[0]
print(args)
Expand Down
Loading