Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shared mem #18

Draft
wants to merge 6 commits into
base: cuda_main_temp
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added time val struct
bauom committed Jul 20, 2023
commit 2a3c767f7e49602a5483c75c267e1175b713c662
51 changes: 48 additions & 3 deletions pyccel/ast/cudaext.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@

from .core import Module, PyccelFunctionDef, Import

from .datatypes import NativeInteger, NativeVoid
from .datatypes import NativeInteger, NativeVoid, NativeFloat, TimeVal

from .internals import PyccelInternalFunction, get_final_precision

@@ -26,7 +26,8 @@
'CudaMemCopy',
'CudaNewArray',
'CudaSynchronize',
'CudaThreadIdx'
'CudaThreadIdx',
'CudaTime'
)

#==============================================================================
@@ -197,6 +198,47 @@ def __init__(self, dim=None):
def dim(self):
return self._dim

class CudaTime(PyccelInternalFunction):
__slots__ = ()
_attribute_nodes = ()
_shape = None
_rank = 0
_dtype = TimeVal()
_precision = 0
_order = None
def __init__(self):
super().__init__()

class CudaTimeDiff(PyccelAstNode):
"""
Represents a General Class For Cuda internal Variables Used To locate Thread In the GPU architecture"

Parameters
----------
dim : NativeInteger
Represent the dimension where we want to locate our thread.

"""
__slots__ = ('_start','_end', '_dtype', '_precision')
_attribute_nodes = ('_start','_end',)
_shape = None
_rank = 0
_order = None

def __init__(self, start=None, end=None):
#...
self._start = start
self._end = end
self._dtype = NativeFloat()
self._precision = 8
super().__init__()

@property
def start(self):
return self._start
@property
def end(self):
return self._end

class CudaCopy(CudaNewArray):
"""
@@ -301,7 +343,10 @@ def __new__(cls, dim=0):
'blockDim' : PyccelFunctionDef('blockDim' , CudaBlockDim),
'blockIdx' : PyccelFunctionDef('blockIdx' , CudaBlockIdx),
'gridDim' : PyccelFunctionDef('gridDim' , CudaGridDim),
'grid' : PyccelFunctionDef('grid' , CudaGrid)
'grid' : PyccelFunctionDef('grid' , CudaGrid),
'time' : PyccelFunctionDef('time' , CudaTime),
'timediff' : PyccelFunctionDef('timediff' , CudaTimeDiff),

}

cuda_Internal_Var = {
6 changes: 6 additions & 0 deletions pyccel/ast/datatypes.py
Original file line number Diff line number Diff line change
@@ -377,3 +377,9 @@ def str_dtype(dtype):
return 'bool'
else:
raise TypeError('Unknown datatype {0}'.format(str(dtype)))


class TimeVal(DataType):
"""Class representing timeval datatype"""
__slots__ = ()
_name = 'timeval'
4 changes: 4 additions & 0 deletions pyccel/ast/operators.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
from .datatypes import (NativeBool, NativeInteger, NativeFloat,
NativeComplex, NativeString,
NativeNumeric)
from .datatypes import TimeVal

from .internals import max_precision

@@ -393,6 +394,7 @@ def _calculate_dtype(cls, *args):
floats = [a for a in args if a.dtype is NativeFloat()]
complexes = [a for a in args if a.dtype is NativeComplex()]
strs = [a for a in args if a.dtype is NativeString()]
time = [a for a in args if a.dtype is TimeVal()]

if strs:
assert len(integers + floats + complexes) == 0
@@ -403,6 +405,8 @@ def _calculate_dtype(cls, *args):
return cls._handle_float_type(args)
elif integers:
return cls._handle_integer_type(args)
elif time:
return time
else:
raise TypeError('cannot determine the type of {}'.format(args))

63 changes: 54 additions & 9 deletions pyccel/codegen/printing/ccudacode.py
Original file line number Diff line number Diff line change
@@ -13,22 +13,23 @@
FunctionDefArgument, Assign, Import,
AliasAssign, Module, Declare, AsName)

from pyccel.ast.datatypes import NativeInteger
from pyccel.ast.datatypes import NativeInteger, NativeComplex, NativeBool, TimeVal, default_precision
from pyccel.ast.datatypes import NativeTuple, datatype
from pyccel.ast.literals import LiteralTrue, Literal, Nil
from pyccel.ast.literals import LiteralTrue, LiteralString, Literal, Nil
from pyccel.ast.c_concepts import ObjectAddress, CMacro, CStringExpression

from pyccel.ast.numpyext import NumpyFull, NumpyArray, NumpyArange

from pyccel.ast.cupyext import CupyFull, CupyArray, CupyArange

from pyccel.ast.cudaext import CudaCopy, cuda_Internal_Var, CudaArray, CudaSharedArray
from pyccel.ast.cudaext import CudaCopy, cuda_Internal_Var, CudaArray, CudaSharedArray, CudaTime

from pyccel.ast.operators import PyccelMul, PyccelUnarySub

from pyccel.ast.variable import Variable, PyccelArraySize
from pyccel.ast.variable import InhomogeneousTupleVariable, DottedName

from pyccel.ast.internals import Slice
from pyccel.ast.internals import Slice, get_final_precision
from pyccel.ast.c_concepts import ObjectAddress

from pyccel.codegen.printing.ccode import CCodePrinter
@@ -189,6 +190,7 @@
('int',8) : 'int64_t',
('int',2) : 'int16_t',
('int',1) : 'int8_t',
('timeval', 0): 'struct timeval',
('bool',4) : 'bool'}

ndarray_type_registry = {
@@ -217,9 +219,11 @@
'pyc_math_c',
'stdio',
'stdbool',
'sys/time',
'assert']}

class CCudaCodePrinter(CCodePrinter):
print("---------")
"""A printer to convert python expressions to strings of ccuda code"""
printmethod = "_ccudacode"
language = "ccuda"
@@ -459,7 +463,7 @@ def _print_Allocate(self, expr):
memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
else:
memory_location = 'managedMemory'
alloc_code = f"{expr.variable} = \
alloc_code = f"{self._print(expr.variable)} = \
cuda_array_create({len(expr.shape)}, {tmp_shape}, {dtype}, {is_view}, {memory_location});"
return f"{free_code}\n{shape_Assign}\n{alloc_code}\n"

@@ -570,7 +574,8 @@ def _print_Assign(self, expr):
self._temporary_args = [ObjectAddress(a) for a in lhs]
return prefix_code+'{};\n'.format(self._print(rhs))
# Inhomogenous tuples are unravelled and therefore do not exist in the c printer

if isinstance(rhs, CudaTime):
return prefix_code+self.cuda_time(expr)
if isinstance(rhs, (CupyFull)):
return prefix_code+self.cuda_arrayFill(expr)
if isinstance(rhs, CupyArange):
@@ -717,7 +722,46 @@ def _print_CudaInternalVar(self, expr):
var_name = cuda_Internal_Var[var_name]
dim_c = ('x', 'y', 'z')[expr.dim]
return '{}.{}'.format(var_name, dim_c)


def _print_TimeVal(self, expr):
self.add_import(c_imports['sys/time'])
return 'timeval'

def cuda_time(self, expr):
self.add_import(c_imports['sys/time'])
get_time = f'gettimeofday(&{self._print(expr.lhs)}, NULL);\n'
return get_time

def _print_CudaTimeDiff(self, expr):
self.add_import(c_imports['sys/time'])
start = self._print(expr.start)
end = self._print(expr.end)
return f'({end}.tv_sec - {start}.tv_sec) + ({end}.tv_usec - {start}.tv_usec) / 1e6;'
# return f'((({self._print(expr.end)} - {self._print(expr.start)}) * 1000) / CLOCKS_PER_SEC)'

def find_in_dtype_registry(self, dtype, prec):
if prec == -1:
prec = default_precision[dtype]
try :
return dtype_registry[(dtype, prec)]
except KeyError:
errors.report(PYCCEL_RESTRICTION_TODO,
symbol = "{}[kind = {}]".format(dtype, prec),
severity='fatal')

def get_print_format_and_arg(self, var):
try:
arg_format = type_to_format[(self._print(var.dtype), get_final_precision(var))]
except KeyError:
errors.report("{} type is not supported currently".format(var.dtype), severity='fatal')
if var.dtype is NativeComplex():
arg = '{}, {}'.format(self._print(NumpyReal(var)), self._print(NumpyImag(var)))
elif var.dtype is NativeBool():
arg = '{} ? "True" : "False"'.format(self._print(var))
else:
arg = self._print(var)
return arg_format, arg

def cudaCopy(self, lhs, rhs):
from_location = 'Host'
to_location = 'Host'
@@ -726,10 +770,11 @@ def cudaCopy(self, lhs, rhs):
if rhs.memory_location in ('device', 'managed'):
to_location = 'Device'
transfer_type = 'cudaMemcpy{0}To{1}'.format(from_location, to_location)
var = self._print(lhs)
if isinstance(rhs.is_async, LiteralTrue):
cpy_data = "cudaMemcpyAsync({0}.raw_data, {1}.raw_data, {0}.buffer_size, {2}, 0);".format(lhs, rhs.arg, transfer_type)
cpy_data = "cudaMemcpyAsync({0}.raw_data, {1}.raw_data, {0}.buffer_size, {2}, 0);".format(var, rhs.arg, transfer_type)
else:
cpy_data = "cudaMemcpy({0}.raw_data, {1}.raw_data, {0}.buffer_size, {2});".format(lhs, rhs.arg, transfer_type)
cpy_data = "cudaMemcpy({0}.raw_data, {1}.raw_data, {0}.buffer_size, {2});".format(var, rhs.arg, transfer_type)
return '%s\n' % (cpy_data)

def ccudacode(expr, filename, assign_to=None, **settings):