rapidsai · rapids-bot · May 4, 2023 · May 2, 2023 · May 2, 2023 · May 4, 2023
@@ -6,7 +6,6 @@
 from numba import cuda, types
 from numba.cuda.cudadrv.devices import get_context
 from numba.np import numpy_support
-from numba.types import Record
 
 import cudf.core.udf.utils
 from cudf.core.udf.groupby_typing import (
@@ -19,6 +18,7 @@
     groupby_apply_kernel_template,
 )
 from cudf.core.udf.utils import (
+    Row,
     _generate_cache_key,
     _get_extensionty_size,
     _get_kernel,
@@ -32,10 +32,9 @@
 
 def _get_frame_groupby_type(dtype, index_dtype):
     """
-    Get the numba `Record` type corresponding to a frame.
-    Models the column as a dictionary like data structure
-    containing GroupTypes.
-    See numba.np.numpy_support.from_struct_dtype for details.
+    Get the Numba type corresponding to a row of grouped data. Models the
+    column as a Record-like data structure containing GroupTypes. See
+    numba.np.numpy_support.from_struct_dtype for details.
 
     Parameters
     ----------
@@ -74,7 +73,7 @@ def _get_frame_groupby_type(dtype, index_dtype):
 
     # Numba requires that structures are aligned for the CUDA target
     _is_aligned_struct = True
-    return Record(fields, offset, _is_aligned_struct)
+    return Row(fields, offset, _is_aligned_struct)
 
 
 def _groupby_apply_kernel_string_from_template(frame, args):

@@ -4,7 +4,6 @@
 import numpy as np
 from numba import cuda
 from numba.np import numpy_support
-from numba.types import Record
 
 from cudf.core.udf.api import Masked, pack_return
 from cudf.core.udf.masked_typing import MaskedType
@@ -16,6 +15,7 @@
     unmasked_input_initializer_template,
 )
 from cudf.core.udf.utils import (
+    Row,
     _all_dtypes_from_frame,
     _construct_signature,
     _get_extensionty_size,
@@ -29,15 +29,12 @@
 
 def _get_frame_row_type(dtype):
     """
-    Get the numba `Record` type corresponding to a frame.
-    Models each column and its mask as a MaskedType and
-    models the row as a dictionary like data structure
-    containing these MaskedTypes.
-    Large parts of this function are copied with comments
-    from the Numba internals and slightly modified to
-    account for validity bools to be present in the final
-    struct.
-    See numba.np.numpy_support.from_struct_dtype for details.
+    Get the Numba type of a row in a frame. Models each column and its mask as
+    a MaskedType and models the row as a dictionary like data structure
+    containing these MaskedTypes. Large parts of this function are copied with
+    comments from the Numba internals and slightly modified to account for
+    validity bools to be present in the final struct. See
+    numba.np.numpy_support.from_struct_dtype for details.
     """
 
     # Create the numpy structured type corresponding to the numpy dtype.
@@ -89,7 +86,7 @@ def _get_frame_row_type(dtype):
 
     # Numba requires that structures are aligned for the CUDA target
     _is_aligned_struct = True
-    return Record(fields, offset, _is_aligned_struct)
+    return Row(fields, offset, _is_aligned_struct)
 
 
 def _row_kernel_string_from_template(frame, row_type, args):

@@ -7,7 +7,6 @@
 from numba.core import cgutils
 from numba.core.datamodel import default_manager
 from numba.core.typing import signature as nb_signature
-from numba.cuda.cudadrv import nvvm
 from numba.cuda.cudaimpl import (
     lower as cuda_lower,
     registry as cuda_lowering_registry,
@@ -126,9 +125,8 @@ def cast_string_literal_to_string_view(context, builder, fromty, toty, val):
     sv = cgutils.create_struct_proxy(string_view)(context, builder)
 
     # set the empty strview data pointer to point to the literal value
-    s = context.insert_const_string(builder.module, fromty.literal_value)
-    sv.data = context.insert_addrspace_conv(
-        builder, s, nvvm.ADDRSPACE_CONSTANT
+    sv.data = context.insert_string_const_addrspace(
+        builder, fromty.literal_value
     )
     sv.length = context.get_constant(size_type, len(fromty.literal_value))
     sv.bytes = context.get_constant(

@@ -11,12 +11,12 @@
 from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker
 from cuda import cudart
 from numba import cuda, typeof
-from numba.core.datamodel import default_manager
+from numba.core.datamodel import default_manager, models
 from numba.core.errors import TypingError
-from numba.cuda.cudadrv import nvvm
+from numba.core.extending import register_model
 from numba.cuda.cudadrv.driver import Linker
 from numba.np import numpy_support
-from numba.types import CPointer, Poison, Tuple, boolean, int64, void
+from numba.types import CPointer, Poison, Record, Tuple, boolean, int64, void
 
 import rmm
 
@@ -240,6 +240,26 @@ def _construct_signature(frame, return_type, args):
     return sig
 
 
+class Row(Record):
+    # Numba's Record type provides a convenient abstraction for representing a
+    # row, in that it provides a mapping from strings (column / field names) to
+    # types. However, it cannot be used directly since it assumes that all its
+    # fields can be converted to NumPy types by Numba's internal conversion
+    # mechanism (`numba.np_support.as_dtype). This is not the case for cuDF
+    # extension types that might be the column types (e.g. masked types, string
+    # types or group types).
+    #
+    # We use this type for type inference and type checking, but not in code
+    # generation. For this use case, it is sufficient to provide a dtype for a
+    # row that corresponds to any Python object.
+    @property
+    def dtype(self):
+        return np.dtype("object")
+
+
+register_model(Row)(models.RecordModel)
+
+
 @cuda.jit(device=True)
 def _mask_get(mask, pos):
     """Return the validity of mask[pos] as a word."""
@@ -356,14 +376,20 @@ def _post_process_output_col(col, retty):
     return as_column(col, retty)
 
 
+# The only supported data layout in NVVM.
+# See: https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html?#data-layout
+_nvvm_data_layout = (
+    "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+    "i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
+    "v64:64:64-v128:128:128-n16:32:64"
+)
+
+
 def _get_extensionty_size(ty):
     """
     Return the size of an extension type in bytes
     """
-    data_layout = nvvm.data_layout
-    if isinstance(data_layout, dict):
-        data_layout = data_layout[64]
-    target_data = ll.create_target_data(data_layout)
+    target_data = ll.create_target_data(_nvvm_data_layout)
     llty = default_manager[ty].get_value_type()
     return llty.get_abi_size(target_data)