Skip to content

Commit

Permalink
Merge pull request #262 from KernelTuner/directives
Browse files Browse the repository at this point in the history
OpenACC bug fixing
  • Loading branch information
benvanwerkhoven authored Jun 8, 2024
2 parents 8ebf8b8 + d2fabc9 commit 323dd89
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 56 deletions.
129 changes: 94 additions & 35 deletions kernel_tuner/utils/directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,48 @@ def __init__(self, directive: Directive, lang: Language):
self.language = lang


class ArraySize(object):
"""Size of an array"""

def __init__(self):
self.size = list()

def __iter__(self):
for i in self.size:
yield i

def __len__(self):
return len(self.size)

def clear(self):
self.size.clear()

def get(self) -> int:
length = len(self.size)
if length == 0:
return 0
elif length == 1:
return self.size[0]
else:
product = 1
for i in self.size:
product *= i
return product

def add(self, dim: int) -> None:
# Only allow adding valid dimensions
if dim >= 1:
self.size.append(dim)


def fortran_md_size(size: ArraySize) -> list:
"""Format a multidimensional size into the correct Fortran string"""
md_size = list()
for dim in size:
md_size.append(f":{dim}")
return md_size


def is_openacc(directive: Directive) -> bool:
"""Check if a directive is OpenACC"""
return isinstance(directive, OpenACC)
Expand Down Expand Up @@ -120,7 +162,7 @@ def openacc_directive_contains_data_clause(line: str) -> bool:
return openacc_directive_contains_clause(line, data_clauses)


def create_data_directive_openacc(name: str, size: int, lang: Language) -> str:
def create_data_directive_openacc(name: str, size: ArraySize, lang: Language) -> str:
"""Create a data directive for a given language"""
if is_cxx(lang):
return create_data_directive_openacc_cxx(name, size)
Expand All @@ -129,17 +171,23 @@ def create_data_directive_openacc(name: str, size: int, lang: Language) -> str:
return ""


def create_data_directive_openacc_cxx(name: str, size: int) -> str:
def create_data_directive_openacc_cxx(name: str, size: ArraySize) -> str:
"""Create C++ OpenACC code to allocate and copy data"""
return f"#pragma acc enter data create({name}[:{size}])\n#pragma acc update device({name}[:{size}])\n"
return f"#pragma acc enter data create({name}[:{size.get()}])\n#pragma acc update device({name}[:{size.get()}])\n"


def create_data_directive_openacc_fortran(name: str, size: int) -> str:
def create_data_directive_openacc_fortran(name: str, size: ArraySize) -> str:
"""Create Fortran OpenACC code to allocate and copy data"""
return f"!$acc enter data create({name}(:{size}))\n!$acc update device({name}(:{size}))\n"
if len(size) == 1:
return f"!$acc enter data create({name}(:{size.get()}))\n!$acc update device({name}(:{size.get()}))\n"
else:
md_size = fortran_md_size(size)
return (
f"!$acc enter data create({name}({','.join(md_size)}))\n!$acc update device({name}({','.join(md_size)}))\n"
)


def exit_data_directive_openacc(name: str, size: int, lang: Language) -> str:
def exit_data_directive_openacc(name: str, size: ArraySize, lang: Language) -> str:
"""Create code to copy data back for a given language"""
if is_cxx(lang):
return exit_data_directive_openacc_cxx(name, size)
Expand All @@ -148,14 +196,18 @@ def exit_data_directive_openacc(name: str, size: int, lang: Language) -> str:
return ""


def exit_data_directive_openacc_cxx(name: str, size: int) -> str:
def exit_data_directive_openacc_cxx(name: str, size: ArraySize) -> str:
"""Create C++ OpenACC code to copy back data"""
return f"#pragma acc exit data copyout({name}[:{size}])\n"
return f"#pragma acc exit data copyout({name}[:{size.get()}])\n"


def exit_data_directive_openacc_fortran(name: str, size: int) -> str:
def exit_data_directive_openacc_fortran(name: str, size: ArraySize) -> str:
"""Create Fortran OpenACC code to copy back data"""
return f"!$acc exit data copyout({name}(:{size}))\n"
if len(size) == 1:
return f"!$acc exit data copyout({name}(:{size.get()}))\n"
else:
md_size = fortran_md_size(size)
return f"!$acc exit data copyout({name}({','.join(md_size)}))\n"


def correct_kernel(kernel_name: str, line: str) -> bool:
Expand All @@ -165,7 +217,7 @@ def correct_kernel(kernel_name: str, line: str) -> bool:

def find_size_in_preprocessor(dimension: str, preprocessor: list) -> int:
"""Find the dimension of a directive defined value in the preprocessor"""
ret_size = None
ret_size = 0
for line in preprocessor:
if f"#define {dimension}" in line:
try:
Expand Down Expand Up @@ -209,45 +261,43 @@ def extract_code(start: str, stop: str, code: str, langs: Code, kernel_name: str
return sections


def parse_size(size: Any, preprocessor: list = None, dimensions: dict = None) -> int:
def parse_size(size: Any, preprocessor: list = None, dimensions: dict = None) -> ArraySize:
"""Converts an arbitrary object into an integer representing memory size"""
ret_size = None
ret_size = ArraySize()
if type(size) is not int:
try:
# Try to convert the size to an integer
ret_size = int(size)
ret_size.add(int(size))
except ValueError:
# If size cannot be natively converted to an int, we try to derive it from the preprocessor
if preprocessor is not None:
try:
try:
if preprocessor is not None:
if "," in size:
ret_size = 1
for dimension in size.split(","):
ret_size *= find_size_in_preprocessor(dimension, preprocessor)
ret_size.add(find_size_in_preprocessor(dimension, preprocessor))
else:
ret_size = find_size_in_preprocessor(size, preprocessor)
except TypeError:
# preprocessor is available but does not contain the dimensions
pass
ret_size.add(find_size_in_preprocessor(size, preprocessor))
except TypeError:
# At least one of the dimension cannot be derived from the preprocessor
pass
# If size cannot be natively converted, nor retrieved from the preprocessor, we check user provided values
if dimensions is not None:
if size in dimensions.keys():
try:
ret_size = int(dimensions[size])
ret_size.add(int(dimensions[size]))
except ValueError:
# User error, no mitigation
return ret_size
elif "," in size:
ret_size = 1
for dimension in size.split(","):
try:
ret_size *= int(dimensions[dimension])
ret_size.add(int(dimensions[dimension]))
except ValueError:
# User error, no mitigation
return None
return ret_size
else:
# size is already an int. no need for conversion
ret_size = size
ret_size.add(size)

return ret_size

Expand Down Expand Up @@ -297,8 +347,13 @@ def wrap_data(code: str, langs: Code, data: dict, preprocessor: list = None, use
intro += create_data_directive_openacc_cxx(name, size)
outro += exit_data_directive_openacc_cxx(name, size)
elif is_openacc(langs.directive) and is_fortran(langs.language):
intro += create_data_directive_openacc_fortran(name, size)
outro += exit_data_directive_openacc_fortran(name, size)
if "," in data[name][1]:
# Multi dimensional
pass
else:
# One dimensional
intro += create_data_directive_openacc_fortran(name, size)
outro += exit_data_directive_openacc_fortran(name, size)
return intro + code + outro


Expand Down Expand Up @@ -537,9 +592,9 @@ def allocate_signature_memory(data: dict, preprocessor: list = None, user_dimens
p_type = data[parameter][0]
size = parse_size(data[parameter][1], preprocessor, user_dimensions)
if "*" in p_type:
args.append(allocate_array(p_type, size))
args.append(allocate_array(p_type, size.get()))
else:
args.append(allocate_scalar(p_type, size))
args.append(allocate_scalar(p_type, size.get()))

return args

Expand Down Expand Up @@ -579,11 +634,15 @@ def add_present_openacc(
return new_body


def add_present_openacc_cxx(name: str, size: int) -> str:
def add_present_openacc_cxx(name: str, size: ArraySize) -> str:
"""Create present clause for C++ OpenACC directive"""
return f" present({name}[:{size}]) "
return f" present({name}[:{size.get()}]) "


def add_present_openacc_fortran(name: str, size: int) -> str:
def add_present_openacc_fortran(name: str, size: ArraySize) -> str:
"""Create present clause for Fortran OpenACC directive"""
return f" present({name}(:{size})) "
if len(size) == 1:
return f" present({name}(:{size.get()})) "
else:
md_size = fortran_md_size(size)
return f" present({name}({','.join(md_size)})) "
55 changes: 34 additions & 21 deletions test/utils/test_directives.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from pytest import raises

from kernel_tuner.utils.directives import *


Expand Down Expand Up @@ -44,21 +42,31 @@ def test_openacc_directive_contains_data_clause():


def test_create_data_directive():
size = ArraySize()
size.add(1024)
assert (
create_data_directive_openacc("array", 1024, Cxx())
create_data_directive_openacc("array", size, Cxx())
== "#pragma acc enter data create(array[:1024])\n#pragma acc update device(array[:1024])\n"
)
size.clear()
size.add(35)
size.add(16)
assert (
create_data_directive_openacc("matrix", 35, Fortran())
== "!$acc enter data create(matrix(:35))\n!$acc update device(matrix(:35))\n"
create_data_directive_openacc("matrix", size, Fortran())
== "!$acc enter data create(matrix(:35,:16))\n!$acc update device(matrix(:35,:16))\n"
)
assert create_data_directive_openacc("array", 1024, None) == ""
assert create_data_directive_openacc("array", size, None) == ""


def test_exit_data_directive():
assert exit_data_directive_openacc("array", 1024, Cxx()) == "#pragma acc exit data copyout(array[:1024])\n"
assert exit_data_directive_openacc("matrix", 35, Fortran()) == "!$acc exit data copyout(matrix(:35))\n"
assert exit_data_directive_openacc("matrix", 1024, None) == ""
size = ArraySize()
size.add(1024)
assert exit_data_directive_openacc("array", size, Cxx()) == "#pragma acc exit data copyout(array[:1024])\n"
size.clear()
size.add(35)
size.add(16)
assert exit_data_directive_openacc("matrix", size, Fortran()) == "!$acc exit data copyout(matrix(:35,:16))\n"
assert exit_data_directive_openacc("matrix", size, None) == ""


def test_correct_kernel():
Expand All @@ -69,16 +77,16 @@ def test_correct_kernel():


def test_parse_size():
assert parse_size(128) == 128
assert parse_size("16") == 16
assert parse_size("test") is None
assert parse_size("n", ["#define n 1024\n"]) == 1024
assert parse_size("n,m", ["#define n 16\n", "#define m 32\n"]) == 512
assert parse_size("n", ["#define size 512\n"], {"n": 32}) == 32
assert parse_size("m", ["#define size 512\n"], {"n": 32}) is None
assert parse_size("rows,cols", dimensions={"rows": 16, "cols": 8}) == 128
assert parse_size("n_rows,n_cols", ["#define n_cols 16\n", "#define n_rows 32\n"]) == 512
assert parse_size("rows,cols", [], dimensions={"rows": 16, "cols": 8}) == 128
assert parse_size(128).get() == 128
assert parse_size("16").get() == 16
assert parse_size("test").get() == 0
assert parse_size("n", ["#define n 1024\n"]).get() == 1024
assert parse_size("n,m", ["#define n 16\n", "#define m 32\n"]).get() == 512
assert parse_size("n", ["#define size 512\n"], {"n": 32}).get() == 32
assert parse_size("m", ["#define size 512\n"], {"n": 32}).get() == 0
assert parse_size("rows,cols", dimensions={"rows": 16, "cols": 8}).get() == 128
assert parse_size("n_rows,n_cols", ["#define n_cols 16\n", "#define n_rows 32\n"]).get() == 512
assert parse_size("rows,cols", [], dimensions={"rows": 16, "cols": 8}).get() == 128


def test_wrap_timing():
Expand Down Expand Up @@ -272,8 +280,8 @@ def test_extract_directive_data():
def test_allocate_signature_memory():
code = "#pragma tuner start vector_add a(float*:VECTOR_SIZE) b(float*:VECTOR_SIZE) c(float*:VECTOR_SIZE) size(int:VECTOR_SIZE)\n#pragma acc"
data = extract_directive_data(code, Code(OpenACC(), Cxx()))
with raises(TypeError):
_ = allocate_signature_memory(data["vector_add"])
args = allocate_signature_memory(data["vector_add"])
assert args[3] == 0
preprocessor = ["#define VECTOR_SIZE 1024\n"]
args = allocate_signature_memory(data["vector_add"], preprocessor)
assert type(args[0]) is np.ndarray
Expand Down Expand Up @@ -327,3 +335,8 @@ def test_add_present_openacc():
code_cxx = "#pragma acc parallel num_gangs(32)\n\t#pragma acc loop\n\t//for loop\n"
expected_cxx = "#pragma acc parallel num_gangs(32) present(array[:42])\n\t#pragma acc loop\n\t//for loop\n"
assert add_present_openacc(code_cxx, acc_cxx, data, preprocessor, None) == expected_cxx
code_f90 = "!$acc parallel async num_workers(16)\n"
data = {"matrix": ["float*", "rows,cols"]}
preprocessor = ["#define cols 18\n", "#define rows 14\n"]
expected_f90 = "!$acc parallel async num_workers(16) present(matrix(:14,:18))\n"
assert add_present_openacc(code_f90, acc_f90, data, preprocessor, None) == expected_f90

0 comments on commit 323dd89

Please sign in to comment.