From a7bbfb429fc522805668f3cf795c46ba24891640 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 24 Feb 2021 04:06:28 -0800 Subject: [PATCH 001/132] just debugging info --- cpp/src/jit/launcher.cpp | 2 ++ cpp/src/transform/transform.cpp | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/cpp/src/jit/launcher.cpp b/cpp/src/jit/launcher.cpp index 2ddcac7d5ba..a19b6a55503 100644 --- a/cpp/src/jit/launcher.cpp +++ b/cpp/src/jit/launcher.cpp @@ -35,6 +35,8 @@ launcher::launcher(const std::string& hash, rmm::cuda_stream_view stream) : cache_instance{cudf::jit::cudfJitCache::Instance()}, stream(stream) { + std::cout << "launcher::launcher cuda_source:" << std::endl; + std::cout << cuda_source << std::endl; program = cache_instance.getProgram( hash, cuda_source.c_str(), header_names, compiler_flags, file_callback); } diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 6da0f78687b..f7fc7f832c4 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -59,11 +59,23 @@ void unary_operation(mutable_column_view output, { std::string hash = "prog_transform" + std::to_string(std::hash{}(udf)); + std::cout << "The program's hash is:" << std::endl; + std::cout << hash << std::endl; + + std::cout << "the actual udf string is: " << std::endl; + std::cout << udf << std::endl; + + + std::cout << "cuda_source is:" << std::endl; std::string cuda_source = code::kernel_header; + std::cout << cuda_source << std::endl; + if (is_ptx) { cuda_source += cudf::jit::parse_single_function_ptx( udf, "GENERIC_UNARY_OP", cudf::jit::get_type_name(output_type), {0}) + code::kernel; + std::cout << "cuda_source after is_ptx condition: " << std::endl; + std::cout << cuda_source << std::endl; } else { cuda_source += cudf::jit::parse_single_function_cuda(udf, "GENERIC_UNARY_OP") + code::kernel; } From 193f8e06677df7d4d5b2b70cb347db80b575b402 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 7 Mar 2021 12:18:18 -0800 Subject: [PATCH 002/132] initial python MaskedType --- python/cudf/cudf/core/__init__.py | 1 + python/cudf/cudf/core/udf.py | 115 ++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 python/cudf/cudf/core/udf.py diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py index 91a369c31f8..22e8027b0ab 100644 --- a/python/cudf/cudf/core/__init__.py +++ b/python/cudf/cudf/core/__init__.py @@ -24,3 +24,4 @@ from cudf.core.multiindex import MultiIndex from cudf.core.scalar import NA, Scalar from cudf.core.series import Series +import cudf.core.udf diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py new file mode 100644 index 00000000000..a1660abe07b --- /dev/null +++ b/python/cudf/cudf/core/udf.py @@ -0,0 +1,115 @@ +import numba +from numba import cuda +from numba import njit + +class Masked(object): + def __init__(self, value, valid): + self.value = value + self.valid = valid + +from numba.extending import types + +class MaskedType(types.Type): + def __init__(self): + super().__init__(name="Masked") + +numba_masked = MaskedType() # name this something more natural - GM + +from numba.core.extending import typeof_impl +@typeof_impl.register(Masked) +def typeof_masked(val, c): + # This can be dynamic and be based off the specifics of the instance + # which is why val is a parameter + # c is a typeof context and doesn't matter for this + return numba_masked + +# types.number - more general numeric values +from numba.core.extending import type_callable +@type_callable(Masked) +def type_masked(context): + def typer(value, valid): + if isinstance(value, types.Integer) and isinstance(valid, types.Boolean): + return numba_masked + return typer + +# {i8, ... } +from numba.core.extending import models, register_model +@register_model(MaskedType) +class MaskedModel(models.StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('value', types.int64), + ('valid', types.bool_) + ] + models.StructModel.__init__(self, dmm, fe_type, members) + +from numba.core.extending import lower_builtin +from numba.core import cgutils + +@lower_builtin(Masked, types.int64, types.bool_) +def impl_masked_constructor(context, builder, sig, args): + + print("beginning:") + print(builder.module) + + typ = sig.return_type + value, valid = args + + masked = cgutils.create_struct_proxy(typ)(context, builder) + masked.value = value + masked.valid = valid + print(masked._getvalue()) + + print("end:") + print(builder.module) + return masked._getvalue() # return a pointer to the struct I created + +@njit +def create_masked(value, valid): + Masked(value, valid) + +import numpy as np +create_masked(1, True) + +from numba.core.typing import signature + +from numba.core.typing.templates import AbstractTemplate +from numba.cuda.cudadecl import registry as cuda_registry + +import operator + +@cuda_registry.register_global(operator.add) +class MaskedScalarAdd(AbstractTemplate): + # abstracttemplate vs concretetemplate + def generic(self, args, kws): + if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): + return signature(numba_masked, numba_masked, numba_masked) + +from numba.cuda.cudaimpl import lower as cuda_lower + +@cuda_lower(operator.add, MaskedType, MaskedType) +def masked_scalar_add_impl(context, builder, sig, args): + # get the types from the signature + + masked_type_1, masked_type_2 = sig.args + masked_return_type = sig.return_type + + + # create LLVM IR structs + m1 = cgutils.create_struct_proxy(masked_type_1)(context, builder, value=args[0]) + m2 = cgutils.create_struct_proxy(masked_type_2)(context, builder, value=args[1]) + result = cgutils.create_struct_proxy(masked_return_type)(context, builder) + + valid = builder.or_(m1.valid, m2.valid) + result.valid = valid + with builder.if_then(valid): + result.value = builder.add(m1.value, m2.value) + + return result._getvalue() + +@cuda.jit(numba_masked(numba_masked, numba_masked), device=True) +def masked_add_py(m1, m2): + return m1 + m2 + +def masked_add_py_2(m1, m2): + return m1 + m2 From 91ae6a3d2ea71494075648398b09f75dc8f3044b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 7 Mar 2021 12:21:52 -0800 Subject: [PATCH 003/132] a little cleanup --- python/cudf/cudf/core/udf.py | 88 ++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index a1660abe07b..0caffb00768 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -1,82 +1,68 @@ +import operator + import numba -from numba import cuda -from numba import njit +import numpy as np +from numba import cuda, njit +from numba.core import cgutils +from numba.core.extending import ( + lower_builtin, + models, + register_model, + type_callable, + typeof_impl, +) +from numba.core.typing import signature +from numba.core.typing.templates import AbstractTemplate +from numba.cuda.cudadecl import registry as cuda_registry +from numba.cuda.cudaimpl import lower as cuda_lower +from numba.extending import types + class Masked(object): def __init__(self, value, valid): self.value = value self.valid = valid -from numba.extending import types class MaskedType(types.Type): def __init__(self): super().__init__(name="Masked") -numba_masked = MaskedType() # name this something more natural - GM +numba_masked = MaskedType() # name this something more natural - GM -from numba.core.extending import typeof_impl @typeof_impl.register(Masked) def typeof_masked(val, c): - # This can be dynamic and be based off the specifics of the instance - # which is why val is a parameter - # c is a typeof context and doesn't matter for this return numba_masked -# types.number - more general numeric values -from numba.core.extending import type_callable + @type_callable(Masked) def type_masked(context): def typer(value, valid): - if isinstance(value, types.Integer) and isinstance(valid, types.Boolean): + if isinstance(value, types.Integer) and isinstance( + valid, types.Boolean + ): return numba_masked + return typer -# {i8, ... } -from numba.core.extending import models, register_model + @register_model(MaskedType) class MaskedModel(models.StructModel): def __init__(self, dmm, fe_type): - members = [ - ('value', types.int64), - ('valid', types.bool_) - ] + members = [("value", types.int64), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) -from numba.core.extending import lower_builtin -from numba.core import cgutils @lower_builtin(Masked, types.int64, types.bool_) def impl_masked_constructor(context, builder, sig, args): - - print("beginning:") - print(builder.module) - typ = sig.return_type - value, valid = args - + value, valid = args + masked = cgutils.create_struct_proxy(typ)(context, builder) masked.value = value masked.valid = valid - print(masked._getvalue()) - - print("end:") - print(builder.module) - return masked._getvalue() # return a pointer to the struct I created - -@njit -def create_masked(value, valid): - Masked(value, valid) - -import numpy as np -create_masked(1, True) + return masked._getvalue() # return a pointer to the struct I created -from numba.core.typing import signature - -from numba.core.typing.templates import AbstractTemplate -from numba.cuda.cudadecl import registry as cuda_registry - -import operator @cuda_registry.register_global(operator.add) class MaskedScalarAdd(AbstractTemplate): @@ -85,7 +71,6 @@ def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): return signature(numba_masked, numba_masked, numba_masked) -from numba.cuda.cudaimpl import lower as cuda_lower @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -93,23 +78,28 @@ def masked_scalar_add_impl(context, builder, sig, args): masked_type_1, masked_type_2 = sig.args masked_return_type = sig.return_type - - + # create LLVM IR structs - m1 = cgutils.create_struct_proxy(masked_type_1)(context, builder, value=args[0]) - m2 = cgutils.create_struct_proxy(masked_type_2)(context, builder, value=args[1]) + m1 = cgutils.create_struct_proxy(masked_type_1)( + context, builder, value=args[0] + ) + m2 = cgutils.create_struct_proxy(masked_type_2)( + context, builder, value=args[1] + ) result = cgutils.create_struct_proxy(masked_return_type)(context, builder) valid = builder.or_(m1.valid, m2.valid) result.valid = valid with builder.if_then(valid): result.value = builder.add(m1.value, m2.value) - + return result._getvalue() + @cuda.jit(numba_masked(numba_masked, numba_masked), device=True) def masked_add_py(m1, m2): return m1 + m2 + def masked_add_py_2(m1, m2): return m1 + m2 From a855a6f453c311d3ee5464de37a8d09041ef79a0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 7 Mar 2021 13:43:58 -0800 Subject: [PATCH 004/132] basic bindings, header, placeholder c++ code --- cpp/include/cudf/transform.hpp | 9 +++++++++ cpp/src/transform/transform.cpp | 18 ++++++++++++++++++ python/cudf/cudf/_lib/cpp/transform.pxd | 7 +++++++ python/cudf/cudf/core/udf.py | 7 +++++++ 4 files changed, 41 insertions(+) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 9b740d207e1..c2c9e954235 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -53,6 +53,15 @@ std::unique_ptr transform( bool is_ptx, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr masked_binary_op( + column_view const& A, + column_view const& B, + std::string const& binary_udf, + data_type output_type, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** * @brief Creates a null_mask from `input` by converting `NaN` to null and * preserving existing null values and also returns new null_count. diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index f7fc7f832c4..da98d99616e 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -131,4 +131,22 @@ std::unique_ptr transform(column_view const& input, return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr); } +std::unique_ptr masked_binary_op(column_view const& A, + column_view const& B, + std::string const& binary_udf, + data_type output_type, + rmm::mr::device_memory_resource* mr) +{ + std::cout << "hello world!" << std::endl; + + rmm::cuda_stream_view stream = rmm::cuda_stream_default; + + std::unique_ptr output = make_fixed_width_column( + output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); + + return output; +} + + + } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 5e37336cb94..624c36f9b19 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -38,6 +38,13 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: bool is_ptx ) except + + cdef unique_ptr[column] masked_binary_op( + column_view A, + column_view B, + string binary_udf, + data_type output_type + ) except + + cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input ) except + diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 0caffb00768..2c58b6bc951 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -16,6 +16,7 @@ from numba.cuda.cudadecl import registry as cuda_registry from numba.cuda.cudaimpl import lower as cuda_lower from numba.extending import types +import inspect class Masked(object): @@ -103,3 +104,9 @@ def masked_add_py(m1, m2): def masked_add_py_2(m1, m2): return m1 + m2 + +def compile_udf(func): + + signature = (numba_masked, numba_masked) + ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True) + return ptx From 1b2c00c2e1dc50f5d01df22a2e3af4657cfe94b2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 7 Mar 2021 13:45:40 -0800 Subject: [PATCH 005/132] missed one cython file - bindings work and run --- python/cudf/cudf/_lib/transform.pyx | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index f061f8de942..93090cc78fc 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -127,6 +127,36 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) +def masked_binary_op(Column A, Column B, op): + cdef column_view A_view = A.view() + cdef column_view B_view = B.view() + + cdef string c_str + cdef type_id c_tid + cdef data_type c_dtype + + if A.dtype != np.dtype('int64') or B.dtype != np.dtype('int64'): + raise TypeError('int64 please') + + + from cudf.core.udf import compile_udf + st = compile_udf(op).encode('UTF-8') + + c_tid = ( + np_to_cudf_types[np.dtype('int64')] + ) + c_dtype = data_type(c_tid) + + with nogil: + c_output = move(libcudf_transform.masked_binary_op( + A_view, + B_view, + c_str, + c_dtype, + )) + + return Column.from_unique_ptr(move(c_output)) + def table_encode(Table input): cdef table_view c_input = input.data_view() From 7584ad3e8b1303c866c706bcc44fe776e192da3b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 7 Mar 2021 13:53:14 -0800 Subject: [PATCH 006/132] fix bug --- python/cudf/cudf/_lib/transform.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 93090cc78fc..1fea3e7bbec 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -140,7 +140,7 @@ def masked_binary_op(Column A, Column B, op): from cudf.core.udf import compile_udf - st = compile_udf(op).encode('UTF-8') + c_str = compile_udf(op).encode('UTF-8') c_tid = ( np_to_cudf_types[np.dtype('int64')] From 4988b14487979f914228e14b198e9b13bd49d34d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 8 Mar 2021 05:07:00 -0800 Subject: [PATCH 007/132] little more progress --- cpp/src/transform/jit/code/kernel.cpp | 21 +++++++++++++++++++++ cpp/src/transform/transform.cpp | 12 +++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 58fdb945de3..29f7cb4a1d7 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -53,6 +53,27 @@ const char* kernel = } )***"; +const char* masked_binary_op_kernel = + R"***( + template + __global__ + void kernel(cudf::size_type size, + TypeOut* out_data, TypeIn1* in_data1, TypeIn2 in_data2) { + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + for (cudf::size_type i=start; i masked_binary_op(column_view const& A, data_type output_type, rmm::mr::device_memory_resource* mr) { - std::cout << "hello world!" << std::endl; + std::cout << "ehllo " << std::endl; + std::cout << binary_udf << std::endl; rmm::cuda_stream_view stream = rmm::cuda_stream_default; + + std::unique_ptr output = make_fixed_width_column( output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); + auto null_mask = cudf::create_null_mask(A.size(), mask_state::ALL_VALID, mr); + + std::unique_ptr output_mask = make_fixed_width_column( + cudf::data_type{cudf::type_id::BOOL8}, A.size(), null_mask, cudf::UNKNOWN_NULL_COUNT, stream, mr); + + + return output; } From 7a6427cdccb29238ef271ee36fcfb3eefefa1381 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 Mar 2021 18:48:41 -0800 Subject: [PATCH 008/132] an attempt at NA plumbing --- python/cudf/cudf/core/udf.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 2c58b6bc951..891406acba8 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -18,6 +18,7 @@ from numba.extending import types import inspect +from cudf.core.scalar import _NAType class Masked(object): def __init__(self, value, valid): @@ -29,12 +30,21 @@ class MaskedType(types.Type): def __init__(self): super().__init__(name="Masked") +class NAType(types.Type): + def __init__(self): + super().__init__(name="NA") + numba_masked = MaskedType() # name this something more natural - GM +numba_na = NAType() + @typeof_impl.register(Masked) def typeof_masked(val, c): return numba_masked +@typeof_impl.register(_NAType) +def typeof_na(val, c): + return numba_na @type_callable(Masked) def type_masked(context): @@ -73,6 +83,12 @@ def generic(self, args, kws): return signature(numba_masked, numba_masked, numba_masked) +@cuda_registry.register_global(operator.add) +class MaskedScalarAddNull(AbstractTemplate): + def generic(self, args, kws): + if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): + return signature(numba_masked, numba_masked, numba_na) + @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): # get the types from the signature @@ -96,6 +112,13 @@ def masked_scalar_add_impl(context, builder, sig, args): return result._getvalue() +@cuda_lower(operator.add, MaskedType, NAType) +def masked_scalar_add_na_impl(context, builder, sig, args): + return_type = sig.return_type + result = cgutils.create_struct_proxy(return_type)(context, builder) + result.valid = False + return result._getvalue() + @cuda.jit(numba_masked(numba_masked, numba_masked), device=True) def masked_add_py(m1, m2): @@ -110,3 +133,8 @@ def compile_udf(func): signature = (numba_masked, numba_masked) ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True) return ptx + +@cuda.jit(numba_masked(numba_masked), device=True) +def test_scalar_null_add(masked): + result = masked + cudf.NA + return masked From 5e6eb0683370df9d5f3d6b4c047f66186d0efc6c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 Mar 2021 18:49:52 -0800 Subject: [PATCH 009/132] a little more plubming and prototyping --- cpp/include/cudf/transform.hpp | 2 ++ cpp/src/transform/jit/code/kernel.cpp | 17 ++++++++++++++++- cpp/src/transform/transform.cpp | 16 ++++++++++++---- python/cudf/cudf/_lib/cpp/transform.pxd | 4 +++- python/cudf/cudf/_lib/transform.pyx | 8 ++++++++ 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index c2c9e954235..ad1d260f943 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -59,6 +59,8 @@ std::unique_ptr masked_binary_op( column_view const& B, std::string const& binary_udf, data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 29f7cb4a1d7..4f490956ace 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -31,10 +31,17 @@ const char* kernel_header = #include #include + + struct Masked { + int value; + bool valid; + }; + )***"; const char* kernel = R"***( + template __global__ void kernel(cudf::size_type size, @@ -47,6 +54,10 @@ const char* kernel = int start = tid + blkid * blksz; int step = blksz * gridsz; + Masked m; + m.value = 1; + m.valid = true; + for (cudf::size_type i=start; i __global__ void kernel(cudf::size_type size, @@ -67,8 +80,10 @@ const char* masked_binary_op_kernel = int start = tid + blkid * blksz; int step = blksz * gridsz; + Masked m; + for (cudf::size_type i=start; i masked_binary_op(column_view const& A, column_view const& B, std::string const& binary_udf, data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { std::cout << "ehllo " << std::endl; std::cout << binary_udf << std::endl; rmm::cuda_stream_view stream = rmm::cuda_stream_default; +zz + std::string parsed_ptx = cudf::jit::parse_single_function_ptx( + binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); + + + std::cout << "successfully parsed PTX!!!" << std::endl; + std::cout << "__________________________" << std::endl; + std::cout << parsed_ptx << std::endl; + std::cout << "__________________________" << std::endl; + std::unique_ptr output = make_fixed_width_column( output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); - auto null_mask = cudf::create_null_mask(A.size(), mask_state::ALL_VALID, mr); - std::unique_ptr output_mask = make_fixed_width_column( - cudf::data_type{cudf::type_id::BOOL8}, A.size(), null_mask, cudf::UNKNOWN_NULL_COUNT, stream, mr); - return output; } diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 624c36f9b19..217b3374579 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -42,7 +42,9 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view A, column_view B, string binary_udf, - data_type output_type + data_type output_type, + column_view outcol_view, + column_view outmask_view, ) except + cdef pair[unique_ptr[table], unique_ptr[column]] encode( diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 1fea3e7bbec..b94d2b33bce 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -147,12 +147,20 @@ def masked_binary_op(Column A, Column B, op): ) c_dtype = data_type(c_tid) + cdef Column output_column = cudf.core.column.column_empty(len(A), dtype='int64') + cdef Column output_mask = cudf.core.column.column_empty(len(A), dtype='bool') + + cdef column_view outcol_view = output_column.view() + cdef column_view outmsk_view = output_mask.view() + with nogil: c_output = move(libcudf_transform.masked_binary_op( A_view, B_view, c_str, c_dtype, + outcol_view, + outmsk_view )) return Column.from_unique_ptr(move(c_output)) From ea15da63ce003f4428b2c5f026aabd10032a2a38 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 12 Mar 2021 09:12:05 -0800 Subject: [PATCH 010/132] lots of progress --- cpp/src/transform/jit/code/code.h | 1 + cpp/src/transform/jit/code/kernel.cpp | 32 +++++++---- cpp/src/transform/transform.cpp | 77 ++++++++++++++------------- python/cudf/cudf/core/udf.py | 41 +++++++++++--- 4 files changed, 98 insertions(+), 53 deletions(-) diff --git a/cpp/src/transform/jit/code/code.h b/cpp/src/transform/jit/code/code.h index cc3d6a8fe89..b12fac6e2c7 100644 --- a/cpp/src/transform/jit/code/code.h +++ b/cpp/src/transform/jit/code/code.h @@ -24,6 +24,7 @@ extern const char* kernel_header; extern const char* kernel; extern const char* traits; extern const char* operation; +extern const char* masked_binary_op_kernel; } // namespace code } // namespace jit diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 4f490956ace..d0632e8385a 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -28,9 +28,9 @@ const char* kernel_header = #include #include #include - #include #include + #include struct Masked { int value; @@ -54,10 +54,6 @@ const char* kernel = int start = tid + blkid * blksz; int step = blksz * gridsz; - Masked m; - m.value = 1; - m.valid = true; - for (cudf::size_type i=start; i __global__ - void kernel(cudf::size_type size, - TypeOut* out_data, TypeIn1* in_data1, TypeIn2 in_data2) { + void kernel(cudf::size_type size, + cudf::size_type offset, + TypeOut* out_data, + cudf::bitmask_type const* out_mask, + TypeIn1* in_data1, + cudf::bitmask_type const* in_data1_mask, + TypeIn2* in_data2, + cudf::bitmask_type const* in_data2_mask + ) { int tid = threadIdx.x; int blkid = blockIdx.x; int blksz = blockDim.x; @@ -80,10 +83,21 @@ const char* masked_binary_op_kernel = int start = tid + blkid * blksz; int step = blksz * gridsz; - Masked m; + Masked output; for (cudf::size_type i=start; i{}(udf)); - - std::cout << "The program's hash is:" << std::endl; - std::cout << hash << std::endl; - - std::cout << "the actual udf string is: " << std::endl; - std::cout << udf << std::endl; - - - std::cout << "cuda_source is:" << std::endl; std::string cuda_source = code::kernel_header; - std::cout << cuda_source << std::endl; if (is_ptx) { cuda_source += cudf::jit::parse_single_function_ptx( udf, "GENERIC_UNARY_OP", cudf::jit::get_type_name(output_type), {0}) + code::kernel; - std::cout << "cuda_source after is_ptx condition: " << std::endl; - std::cout << cuda_source << std::endl; } else { cuda_source += cudf::jit::parse_single_function_cuda(udf, "GENERIC_UNARY_OP") + code::kernel; } @@ -93,6 +81,26 @@ void unary_operation(mutable_column_view output, .launch(output.size(), cudf::jit::get_data_ptr(output), cudf::jit::get_data_ptr(input)); } + +void binary_operation(column_view const& A, + column_view const& B, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + std::string cuda_source = code::kernel_header; + cuda_source += cudf::jit::parse_single_function_ptx( + binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); + + cuda_source += code::masked_binary_op_kernel; + + std::cout << "*** CUDA_SOURCE ***" << std::endl; + std::cout << cuda_source << std::endl; + +} + } // namespace jit } // namespace transformation @@ -119,6 +127,24 @@ std::unique_ptr transform(column_view const& input, return output; } +std::unique_ptr masked_binary_op_inner(column_view const& A, + column_view const& B, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + rmm::cuda_stream_view stream = rmm::cuda_stream_default; + transformation::jit::binary_operation(A, B, binary_udf, output_type, outcol_view, outmsk_view, mr); + + std::unique_ptr output = make_fixed_width_column( + output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); + + + return output; +} + } // namespace detail std::unique_ptr transform(column_view const& input, @@ -139,32 +165,9 @@ std::unique_ptr masked_binary_op(column_view const& A, column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { - std::cout << "ehllo " << std::endl; - std::cout << binary_udf << std::endl; - - rmm::cuda_stream_view stream = rmm::cuda_stream_default; -zz - - - std::string parsed_ptx = cudf::jit::parse_single_function_ptx( - binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); - - - std::cout << "successfully parsed PTX!!!" << std::endl; - std::cout << "__________________________" << std::endl; - std::cout << parsed_ptx << std::endl; - std::cout << "__________________________" << std::endl; - - - std::unique_ptr output = make_fixed_width_column( - output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); - - - - - return output; + std::cout << "HERE!!" << std::endl; + return detail::masked_binary_op_inner(A, B, binary_udf, output_type, outcol_view, outmsk_view, mr); } - } // namespace cudf diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 891406acba8..b2f59b5133b 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -1,5 +1,4 @@ import operator - import numba import numpy as np from numba import cuda, njit @@ -14,12 +13,15 @@ from numba.core.typing import signature from numba.core.typing.templates import AbstractTemplate from numba.cuda.cudadecl import registry as cuda_registry -from numba.cuda.cudaimpl import lower as cuda_lower +from numba.cuda.cudaimpl import lower as cuda_lower, registry as cuda_lowering_registry from numba.extending import types import inspect +from llvmlite import ir from cudf.core.scalar import _NAType +from numba.core.extending import make_attribute_wrapper + class Masked(object): def __init__(self, value, valid): self.value = value @@ -37,7 +39,6 @@ def __init__(self): numba_masked = MaskedType() # name this something more natural - GM numba_na = NAType() - @typeof_impl.register(Masked) def typeof_masked(val, c): return numba_masked @@ -56,6 +57,8 @@ def typer(value, valid): return typer +make_attribute_wrapper(MaskedType, "value", "value") +make_attribute_wrapper(MaskedType, "valid", "valid") @register_model(MaskedType) class MaskedModel(models.StructModel): @@ -63,6 +66,13 @@ def __init__(self, dmm, fe_type): members = [("value", types.int64), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) +#@register_model(NAType) # check what model NoneType uses +#class NAModel(models.OpaqueModel): +# def __init__(self, dmm, fe_type): +# members = [] +# models.StructModel.__init__(self, dmm, fe_type, members) + +register_model(NAType)(models.OpaqueModel) @lower_builtin(Masked, types.int64, types.bool_) def impl_masked_constructor(context, builder, sig, args): @@ -112,13 +122,21 @@ def masked_scalar_add_impl(context, builder, sig, args): return result._getvalue() + @cuda_lower(operator.add, MaskedType, NAType) def masked_scalar_add_na_impl(context, builder, sig, args): - return_type = sig.return_type - result = cgutils.create_struct_proxy(return_type)(context, builder) - result.valid = False +# return_type = sig.return_type + # use context to get llvm type for a bool + breakpoint() + result = cgutils.create_struct_proxy(numba_masked)(context, builder) + result.valid = context.get_constant(types.boolean, 0) return result._getvalue() +@cuda_lowering_registry.lower_constant(NAType) +def constant_dummy(context, builder, ty, pyval): + # This handles None, etc. + return context.get_dummy_value() + @cuda.jit(numba_masked(numba_masked, numba_masked), device=True) def masked_add_py(m1, m2): @@ -137,4 +155,13 @@ def compile_udf(func): @cuda.jit(numba_masked(numba_masked), device=True) def test_scalar_null_add(masked): result = masked + cudf.NA - return masked + return result + +@cuda.jit +def test_test_scalar_null_add(data, masks): + m = Masked(1, True) + result = test_scalar_null_add(m) + print(result.value) + print(types.int8(result.valid)) + data[0] = result.value + masks[0] = result.valid From 961a9dd3db0e2250553f4f6a074d10657db08abd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 15 Mar 2021 17:46:01 -0700 Subject: [PATCH 011/132] trying to plumb to jitify launcher --- cpp/src/transform/jit/code/code.h | 1 + cpp/src/transform/jit/code/kernel.cpp | 58 ++++++++++++--------- cpp/src/transform/transform.cpp | 73 ++++++++++++++++++++++++++- python/cudf/cudf/core/udf.py | 5 +- 4 files changed, 109 insertions(+), 28 deletions(-) diff --git a/cpp/src/transform/jit/code/code.h b/cpp/src/transform/jit/code/code.h index b12fac6e2c7..c5321ad824a 100644 --- a/cpp/src/transform/jit/code/code.h +++ b/cpp/src/transform/jit/code/code.h @@ -25,6 +25,7 @@ extern const char* kernel; extern const char* traits; extern const char* operation; extern const char* masked_binary_op_kernel; +extern const char* null_kernel; } // namespace code } // namespace jit diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index d0632e8385a..d12fa8e158b 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -64,44 +64,52 @@ const char* masked_binary_op_kernel = R"***( - template + template __global__ - void kernel(cudf::size_type size, - cudf::size_type offset, + void test_binop_kernel(cudf::size_type size, TypeOut* out_data, - cudf::bitmask_type const* out_mask, - TypeIn1* in_data1, - cudf::bitmask_type const* in_data1_mask, - TypeIn2* in_data2, - cudf::bitmask_type const* in_data2_mask + TypeLhs* lhs_data, + TypeRhs* rhs_data + //bool* output_mask, + //cudf::bitmask_type const* lhs_mask, + //cudf::size_type lhs_offset, + //cudf::bitmask_type const* rhs_mask, + //cudf::size_type rhs_offset) ) { - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; + //int tid = threadIdx.x; + //int blkid = blockIdx.x; + //int blksz = blockDim.x; + //int gridsz = gridDim.x; - int start = tid + blkid * blksz; - int step = blksz * gridsz; + //int start = tid + blkid * blksz; + //int step = blksz * gridsz; - Masked output; + //Masked output; - for (cudf::size_type i=start; i + __global__ + void null_kernel(TypeOut* out_data, Type2* test) {} + + )***"; } // namespace code } // namespace jit diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index d5705082dab..c22be708857 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -90,15 +90,86 @@ void binary_operation(column_view const& A, column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { + + std::string hash = "prog_transform" + std::to_string(std::hash{}(binary_udf)); + std::string cuda_source = code::kernel_header; cuda_source += cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); - cuda_source += code::masked_binary_op_kernel; + cuda_source += code::null_kernel; std::cout << "*** CUDA_SOURCE ***" << std::endl; std::cout << cuda_source << std::endl; + rmm::cuda_stream_view stream; + + // Launch the jitify kernel + + cudf::jit::launcher(hash, + cuda_source, + header_names, + cudf::jit::compiler_flags, + headers_code, + stream) + .set_kernel_inst("null_kernel", + { + cudf::jit::get_type_name(outcol_view.type()), cudf::jit::get_type_name(outcol_view.type()), + } + ); + + /* + + + cudf::jit::launcher(hash, + cuda_source, + header_names, + cudf::jit::compiler_flags, + headers_code, + stream) + .set_kernel_inst("test_binop_kernel", // name of the kernel we are launching + {cudf::jit::get_type_name(outcol_view.type()), // list of template arguments + cudf::jit::get_type_name(A.type()), + cudf::jit::get_type_name(B.type())}); + //.launch(outcol_view.size(), + // cudf::jit::get_data_ptr(outcol_view), + // cudf::jit::get_data_ptr(A), + // cudf::jit::get_data_ptr(B), + // cudf::jit::get_data_ptr(outmsk_view), // ? + // A.null_mask(), + // A.offset(), + // B.null_mask(), + // B.offset() + //); + + */ + + /* + void kernel(cudf::size_type size, + TypeOut* out_data, + TypeLhs* lhs_data, + TypeRhs* rhs_data, + cudf::bitmask_type* output_mask, + cudf::bitmask_type const* lhs_mask, + cudf::size_type lhs_offset, + cudf::bitmask_type const* rhs_mask, + cudf::size_type rhs_offset) + + .set_kernel_inst{cudf::jit::get_type_name(out.type()), // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(op, OperatorType::Direct)}) + .launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs), + out.null_mask(), + lhs.null_mask(), + rhs.offset(), + rhs.null_mask(), + rhs.offset()); + */ + } } // namespace jit diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index b2f59b5133b..215254ef97a 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -127,7 +127,6 @@ def masked_scalar_add_impl(context, builder, sig, args): def masked_scalar_add_na_impl(context, builder, sig, args): # return_type = sig.return_type # use context to get llvm type for a bool - breakpoint() result = cgutils.create_struct_proxy(numba_masked)(context, builder) result.valid = context.get_constant(types.boolean, 0) return result._getvalue() @@ -152,9 +151,11 @@ def compile_udf(func): ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True) return ptx +NA = _NAType() + @cuda.jit(numba_masked(numba_masked), device=True) def test_scalar_null_add(masked): - result = masked + cudf.NA + result = masked + NA return result @cuda.jit From 5e930942417b967faf87004d4b28661bf977c586 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Mar 2021 08:49:44 -0700 Subject: [PATCH 012/132] progress on jitify template/launch --- cpp/src/transform/jit/code/kernel.cpp | 7 +++++-- cpp/src/transform/transform.cpp | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index d12fa8e158b..9f7644875fc 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -105,9 +105,12 @@ const char* masked_binary_op_kernel = const char* null_kernel = R"***( - template + template __global__ - void null_kernel(TypeOut* out_data, Type2* test) {} + void null_kernel(cudf::size_type size, + TypeOut* out_data, + TypeLhs* lhs_data, + TypeRhs* rhs_data) {} )***"; diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index c22be708857..b0f28c83201 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -113,11 +113,19 @@ void binary_operation(column_view const& A, headers_code, stream) .set_kernel_inst("null_kernel", - { - cudf::jit::get_type_name(outcol_view.type()), cudf::jit::get_type_name(outcol_view.type()), - } + { + cudf::jit::get_type_name(outcol_view.type()), + cudf::jit::get_type_name(A.type()), + cudf::jit::get_type_name(B.type()) + } + ) + .launch(outcol_view.size(), + outcol_view.size(), + cudf::jit::get_data_ptr(outcol_view), + cudf::jit::get_data_ptr(A), + cudf::jit::get_data_ptr(B) ); - + std::cout << "KERNEL LAUNCHED!!!" << std::endl; /* From 03edcebb3a67d9d9c6e15b465459afc319d417e7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Mar 2021 11:26:37 -0700 Subject: [PATCH 013/132] null kernel launches with all arguments --- cpp/src/transform/jit/code/kernel.cpp | 8 +++++++- cpp/src/transform/transform.cpp | 14 +++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 9f7644875fc..703873b2fa5 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -110,7 +110,13 @@ const char* null_kernel = void null_kernel(cudf::size_type size, TypeOut* out_data, TypeLhs* lhs_data, - TypeRhs* rhs_data) {} + TypeRhs* rhs_data, + bool* out_mask, + cudf::bitmask_type const* lhs_mask, + cudf::size_type lhs_offset, + cudf::bitmask_type const* rhs_mask, + cudf::size_type rhs_offset + ) {} )***"; diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index b0f28c83201..d222bbc5ef2 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -116,14 +116,18 @@ void binary_operation(column_view const& A, { cudf::jit::get_type_name(outcol_view.type()), cudf::jit::get_type_name(A.type()), - cudf::jit::get_type_name(B.type()) + cudf::jit::get_type_name(B.type()), } ) .launch(outcol_view.size(), - outcol_view.size(), - cudf::jit::get_data_ptr(outcol_view), - cudf::jit::get_data_ptr(A), - cudf::jit::get_data_ptr(B) + cudf::jit::get_data_ptr(outcol_view), + cudf::jit::get_data_ptr(A), + cudf::jit::get_data_ptr(B), + cudf::jit::get_data_ptr(outmsk_view), + A.null_mask(), + A.offset(), + B.null_mask(), + B.offset() ); std::cout << "KERNEL LAUNCHED!!!" << std::endl; /* From 2b4c36fb965891e89adb7f4c672f9bd077fcba65 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Mar 2021 12:45:15 -0700 Subject: [PATCH 014/132] bit_is_set works --- cpp/src/transform/jit/code/kernel.cpp | 21 ++++++++++++++++++++- cpp/src/transform/transform.cpp | 3 ++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 703873b2fa5..12944915632 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -116,7 +116,26 @@ const char* null_kernel = cudf::size_type lhs_offset, cudf::bitmask_type const* rhs_mask, cudf::size_type rhs_offset - ) {} + ) { + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + Masked output; + + bool l_valid; + bool r_valid; + + + for (cudf::size_type i=start; i #include +#include #include @@ -39,7 +40,7 @@ namespace transformation { //! Jit functions namespace jit { -const std::vector header_names{cudf_types_hpp, cudf_wrappers_timestamps_hpp}; +const std::vector header_names{cudf_types_hpp, cudf_wrappers_timestamps_hpp, cudf_utilities_bit_hpp}; std::istream* headers_code(std::string filename, std::iostream& stream) { From 3f76df5374ba9370a4516784cd6b26b042ab3de3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Mar 2021 19:16:36 -0700 Subject: [PATCH 015/132] successfully passing struct through the ptx function --- cpp/src/jit/launcher.cpp | 2 -- cpp/src/transform/jit/code/kernel.cpp | 23 ++++++++++++++++++----- cpp/src/transform/transform.cpp | 2 ++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/cpp/src/jit/launcher.cpp b/cpp/src/jit/launcher.cpp index a19b6a55503..2ddcac7d5ba 100644 --- a/cpp/src/jit/launcher.cpp +++ b/cpp/src/jit/launcher.cpp @@ -35,8 +35,6 @@ launcher::launcher(const std::string& hash, rmm::cuda_stream_view stream) : cache_instance{cudf::jit::cudfJitCache::Instance()}, stream(stream) { - std::cout << "launcher::launcher cuda_source:" << std::endl; - std::cout << cuda_source << std::endl; program = cache_instance.getProgram( hash, cuda_source.c_str(), header_names, compiler_flags, file_callback); } diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index 12944915632..feb435e3847 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -33,7 +33,7 @@ const char* kernel_header = #include struct Masked { - int value; + int64_t value; bool valid; }; @@ -116,7 +116,7 @@ const char* null_kernel = cudf::size_type lhs_offset, cudf::bitmask_type const* rhs_mask, cudf::size_type rhs_offset - ) { + ) { int tid = threadIdx.x; int blkid = blockIdx.x; int blksz = blockDim.x; @@ -127,12 +127,25 @@ const char* null_kernel = Masked output; - bool l_valid; - bool r_valid; + char l_valid; + char r_valid; + + long int l_data; + long int r_data; + int64_t* my_int_addr = &output.value; for (cudf::size_type i=start; i{}(binary_udf)); + std::cout << binary_udf << std::endl; + std::string cuda_source = code::kernel_header; cuda_source += cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); From db88f9e8a29aa0ddc3307cd6bd42587b6e050f4b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Mar 2021 19:41:20 -0700 Subject: [PATCH 016/132] pipeline fully runs --- python/cudf/cudf/_lib/transform.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index b94d2b33bce..912cd452950 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -163,7 +163,12 @@ def masked_binary_op(Column A, Column B, op): outmsk_view )) - return Column.from_unique_ptr(move(c_output)) + #return Column.from_unique_ptr(move(c_output)) + + output_mask_real = bools_to_mask(output_mask) + output_column = output_column.set_mask(output_mask_real) + + return output_column def table_encode(Table input): From 9a67670e696d3878688412f4d80d2ac07ee61a5a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 17 Mar 2021 14:40:57 -0700 Subject: [PATCH 017/132] it lives --- cpp/src/transform/jit/code/kernel.cpp | 4 +--- python/cudf/cudf/_lib/transform.pyx | 5 +---- python/cudf/cudf/core/udf.py | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index feb435e3847..e604b04744e 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -133,15 +133,13 @@ const char* null_kernel = long int l_data; long int r_data; - int64_t* my_int_addr = &output.value; - for (cudf::size_type i=start; i Date: Mon, 12 Apr 2021 05:58:33 -0700 Subject: [PATCH 018/132] cleanup and add notebook --- cpp/src/transform/jit/code/code.h | 1 - cpp/src/transform/jit/code/kernel.cpp | 45 +- cpp/src/transform/transform.cpp | 55 +- notebooks/udf-nulls-demo-final.ipynb | 1574 +++++++++++++++++++++++++ 4 files changed, 1576 insertions(+), 99 deletions(-) create mode 100644 notebooks/udf-nulls-demo-final.ipynb diff --git a/cpp/src/transform/jit/code/code.h b/cpp/src/transform/jit/code/code.h index c5321ad824a..b12fac6e2c7 100644 --- a/cpp/src/transform/jit/code/code.h +++ b/cpp/src/transform/jit/code/code.h @@ -25,7 +25,6 @@ extern const char* kernel; extern const char* traits; extern const char* operation; extern const char* masked_binary_op_kernel; -extern const char* null_kernel; } // namespace code } // namespace jit diff --git a/cpp/src/transform/jit/code/kernel.cpp b/cpp/src/transform/jit/code/kernel.cpp index e604b04744e..dc0372baf05 100644 --- a/cpp/src/transform/jit/code/kernel.cpp +++ b/cpp/src/transform/jit/code/kernel.cpp @@ -60,50 +60,7 @@ const char* kernel = } )***"; -const char* masked_binary_op_kernel = - R"***( - - - template - __global__ - void test_binop_kernel(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data - //bool* output_mask, - //cudf::bitmask_type const* lhs_mask, - //cudf::size_type lhs_offset, - //cudf::bitmask_type const* rhs_mask, - //cudf::size_type rhs_offset) - ) { - //int tid = threadIdx.x; - //int blkid = blockIdx.x; - //int blksz = blockDim.x; - //int gridsz = gridDim.x; - - //int start = tid + blkid * blksz; - //int step = blksz * gridsz; - - //Masked output; - - //for (cudf::size_type i=start; i __global__ diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index c5786240068..9753fe67e5d 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -102,7 +102,6 @@ void binary_operation(column_view const& A, cuda_source += code::null_kernel; - std::cout << "*** CUDA_SOURCE ***" << std::endl; std::cout << cuda_source << std::endl; rmm::cuda_stream_view stream; @@ -115,7 +114,7 @@ void binary_operation(column_view const& A, cudf::jit::compiler_flags, headers_code, stream) - .set_kernel_inst("null_kernel", + .set_kernel_inst("masked_binary_op_kernel", { cudf::jit::get_type_name(outcol_view.type()), cudf::jit::get_type_name(A.type()), @@ -132,58 +131,6 @@ void binary_operation(column_view const& A, B.null_mask(), B.offset() ); - std::cout << "KERNEL LAUNCHED!!!" << std::endl; - /* - - - cudf::jit::launcher(hash, - cuda_source, - header_names, - cudf::jit::compiler_flags, - headers_code, - stream) - .set_kernel_inst("test_binop_kernel", // name of the kernel we are launching - {cudf::jit::get_type_name(outcol_view.type()), // list of template arguments - cudf::jit::get_type_name(A.type()), - cudf::jit::get_type_name(B.type())}); - //.launch(outcol_view.size(), - // cudf::jit::get_data_ptr(outcol_view), - // cudf::jit::get_data_ptr(A), - // cudf::jit::get_data_ptr(B), - // cudf::jit::get_data_ptr(outmsk_view), // ? - // A.null_mask(), - // A.offset(), - // B.null_mask(), - // B.offset() - //); - - */ - - /* - void kernel(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data, - cudf::bitmask_type* output_mask, - cudf::bitmask_type const* lhs_mask, - cudf::size_type lhs_offset, - cudf::bitmask_type const* rhs_mask, - cudf::size_type rhs_offset) - - .set_kernel_inst{cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, OperatorType::Direct)}) - .launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs), - out.null_mask(), - lhs.null_mask(), - rhs.offset(), - rhs.null_mask(), - rhs.offset()); - */ } diff --git a/notebooks/udf-nulls-demo-final.ipynb b/notebooks/udf-nulls-demo-final.ipynb new file mode 100644 index 00000000000..bb72fd04378 --- /dev/null +++ b/notebooks/udf-nulls-demo-final.ipynb @@ -0,0 +1,1574 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "brutal-princeton", + "metadata": {}, + "source": [ + "# NA in cuDF UDFs\n", + "cuDFs design decision to store null informtion in bitmasks is really smart, and makes things very performant and tractable in memory bound circumstances such as GPU operations. However this design when coupled with the natural inefficiencies that arise from any kind of serial iteration over our data has made `` support in general user defined functions hard to solution for. This notebook offers an approach based on jitting a UDF's arguments as a special custom Numba type to produce a generic PTX function. This function is than inlined into a general kernel in libcudf and passed the relevant data and masks inside of libcudf. " + ] + }, + { + "cell_type": "markdown", + "id": "scenic-laugh", + "metadata": {}, + "source": [ + "#### Problem setup: concrete example" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "above-athletics", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import cudf" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "declared-correspondence", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
011
1<NA>2
23<NA>
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 1 1\n", + "1 2\n", + "2 3 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = cudf.DataFrame({\n", + " 'x': [1, None, 3],\n", + " 'y': [1, 2, None]\n", + "})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "expected-milwaukee", + "metadata": {}, + "source": [ + "Consider the following UDF on two variables adapted from https://docs.rapids.ai/api/cudf/stable/guide-to-udfs.html. This API is fairly different from the pandas API, for several reasons:\n", + "- In cuDF, We need to write a loop over arrays in classic numba syntax\n", + "- In cuDF, the function returns into an output column we provide as an argument\n", + "- The result is different!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "tired-niger", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyout
0112
1<NA>2<NA>
23<NA>3
\n", + "
" + ], + "text/plain": [ + " x y out\n", + "0 1 1 2\n", + "1 2 \n", + "2 3 3" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def pandas_add(x, y):\n", + " if x is not pd.NA and x < 2:\n", + " return x + y\n", + " else:\n", + " return x\n", + " \n", + "pandas_df = df.to_pandas(nullable=True)\n", + "pandas_df['out'] = pandas_df.apply(lambda row: pandas_add(row['x'], row['y']), axis=1)\n", + "pandas_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "employed-offer", + "metadata": {}, + "outputs": [], + "source": [ + "def gpu_add(x, y, out):\n", + " for i, (xi, yi) in enumerate(zip(x, y)):\n", + " if xi < 2:\n", + " out[i] = xi + yi\n", + " else:\n", + " out[i] = xi" + ] + }, + { + "cell_type": "markdown", + "id": "animal-gateway", + "metadata": {}, + "source": [ + "Problem: The null mask of `y` needs to only be considered if `x > 0`. But it" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "occupied-upgrade", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyout
0112.0
1<NA>2<NA>
23<NA><NA>
\n", + "
" + ], + "text/plain": [ + " x y out\n", + "0 1 1 2.0\n", + "1 2 \n", + "2 3 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.apply_rows(gpu_add,\n", + " incols=['x', 'y'],\n", + " outcols={'out':np.float64},\n", + " kwargs={})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "western-relief", + "metadata": {}, + "source": [ + "We also don't support comparing `cudf.NA` in any of our UDFs, in any way." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "metallic-macedonia", + "metadata": {}, + "outputs": [], + "source": [ + "def gpu_add_error(x, y, out):\n", + " for i, (xi, yi) in enumerate(zip(x, y)):\n", + " if xi is pd.NA:\n", + " return 5\n", + " else:\n", + " return xi + yi" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "scenic-graduate", + "metadata": {}, + "outputs": [ + { + "ename": "TypingError", + "evalue": "Failed in nopython mode pipeline (step: nopython frontend)\n\u001b[1m\u001b[1m\u001b[1mNo implementation of function Function() found for signature:\n \n >>> gpu_add_error (array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))\n \nThere are 2 candidate implementations:\n\u001b[1m - Of which 2 did not match due to:\n Overload in function 'gpu_add_error ': File: ../../../../../../ipynb/: Line 1.\n With argument(s): '(array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))':\u001b[0m\n\u001b[1m Rejected as the implementation raised a specific error:\n TypingError: Failed in nopython mode pipeline (step: nopython frontend)\n \u001b[1m\u001b[1mUnknown attribute 'NA' of type Module()\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n \u001b[0m\n \u001b[0m\u001b[1mDuring: typing of get attribute at (3)\u001b[0m\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n\u001b[0m\n raised from /home/nfs/brmiller/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py:1071\n\u001b[0m\n\u001b[0m\u001b[1mDuring: resolving callee type: Function()\u001b[0m\n\u001b[0m\u001b[1mDuring: typing of call at (8)\n\u001b[0m\n\u001b[1m\nFile \"\", line 8:\u001b[0m\n\u001b[1m\u001b[0m\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mincols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'x'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'y'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0moutcols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'out'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m kwargs={})\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/repos/cudf/python/cudf/cudf/core/dataframe.py\u001b[0m in \u001b[0;36mapply_rows\u001b[0;34m(self, func, incols, outcols, kwargs, pessimistic_nulls, cache_key)\u001b[0m\n\u001b[1;32m 4776\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4777\u001b[0m \u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4778\u001b[0;31m \u001b[0mcache_key\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4779\u001b[0m )\n\u001b[1;32m 4780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mapply_rows\u001b[0;34m(df, func, incols, outcols, kwargs, pessimistic_nulls, cache_key)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mincols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutcols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache_key\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m )\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mapplyrows\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, df, **launch_params)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0mbound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0;31m# Launch kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlaunch_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mlaunch_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 171\u001b[0m \u001b[0;31m# Prepare pessimistic nullmask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mlaunch_kernel\u001b[0;34m(self, df, args)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlaunch_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 197\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 372\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 373\u001b[0m \u001b[0mblockdim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_thread_per_block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[0mgriddim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mntasks\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mblockdim\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mblockdim\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mspecialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0mtargetoptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'link'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m specialization = Dispatcher(self.py_func, [types.void(*argtypes)],\n\u001b[0;32m--> 899\u001b[0;31m self._bind, targetoptions)\n\u001b[0m\u001b[1;32m 900\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecializations\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margtypes\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspecialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mspecialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, func, sigs, bind, targetoptions)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Only one signature supported at present\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 811\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 812\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_compile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 947\u001b[0m kernel = compile_kernel(self.py_func, argtypes,\n\u001b[1;32m 948\u001b[0m \u001b[0mlink\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 949\u001b[0;31m **self.targetoptions)\n\u001b[0m\u001b[1;32m 950\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefinitions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 951\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_bind\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile_kernel\u001b[0;34m(pyfunc, args, link, debug, inline, fastmath, extensions, max_registers, opt)\u001b[0m\n\u001b[1;32m 55\u001b[0m def compile_kernel(pyfunc, args, link, debug=False, inline=False,\n\u001b[1;32m 56\u001b[0m fastmath=False, extensions=[], max_registers=None, opt=True):\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0mcres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompile_cuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpyfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvoid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minline\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0mfname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfndesc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllvm_func_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library, fname,\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile_cuda\u001b[0;34m(pyfunc, return_type, args, debug, inline)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mreturn_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m locals={})\n\u001b[0m\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0mlibrary\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlibrary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36mcompile_extra\u001b[0;34m(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)\u001b[0m\n\u001b[1;32m 600\u001b[0m pipeline = pipeline_class(typingctx, targetctx, library,\n\u001b[1;32m 601\u001b[0m args, return_type, flags, locals)\n\u001b[0;32m--> 602\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile_extra\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 603\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36mcompile_extra\u001b[0;34m(self, func)\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlifted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlifted_from\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 352\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compile_bytecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 353\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompile_ir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc_ir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlifted\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlifted_from\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_bytecode\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 412\u001b[0m \"\"\"\n\u001b[1;32m 413\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc_ir\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 414\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compile_core\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 415\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_compile_ir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfail_reason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_final_pipeline\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCompilerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"All available pipelines exhausted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 385\u001b[0;31m \u001b[0mpm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 386\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipeline_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpass_desc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mpatched_exception\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_patch_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mpatched_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdependency_analysis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0mpass_inst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_pass_registry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpass_inst\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpass_inst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCompilerPass\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 330\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_runPass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpass_inst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 331\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Legacy pass in use\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36m_runPass\u001b[0;34m(self, index, pss, internal_state)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_initialization\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSimpleTimer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpass_time\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 289\u001b[0;31m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_pass\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 290\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSimpleTimer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfinalize_time\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_finalizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mcheck\u001b[0;34m(func, compiler_state)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompiler_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m \u001b[0mmangled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompiler_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 263\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmangled\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 264\u001b[0m msg = (\"CompilerPass implementations should return True/False. \"\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typed_passes.py\u001b[0m in \u001b[0;36mrun_pass\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocals\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m raise_errors=self._raise_errors)\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtypemap\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtypemap\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_errors\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typed_passes.py\u001b[0m in \u001b[0;36mtype_inference_stage\u001b[0;34m(typingctx, interp, args, return_type, locals, raise_errors)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_constraint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpropagate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0mtypemap\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrestype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcalltypes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py\u001b[0m in \u001b[0;36mpropagate\u001b[0;34m(self, raise_errors)\u001b[0m\n\u001b[1;32m 1069\u001b[0m if isinstance(e, ForceLiteralArg)]\n\u001b[1;32m 1070\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mforce_lit_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1071\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1072\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1073\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mor_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_lit_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypingError\u001b[0m: Failed in nopython mode pipeline (step: nopython frontend)\n\u001b[1m\u001b[1m\u001b[1mNo implementation of function Function() found for signature:\n \n >>> gpu_add_error (array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))\n \nThere are 2 candidate implementations:\n\u001b[1m - Of which 2 did not match due to:\n Overload in function 'gpu_add_error ': File: ../../../../../../ipynb/: Line 1.\n With argument(s): '(array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))':\u001b[0m\n\u001b[1m Rejected as the implementation raised a specific error:\n TypingError: Failed in nopython mode pipeline (step: nopython frontend)\n \u001b[1m\u001b[1mUnknown attribute 'NA' of type Module()\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n \u001b[0m\n \u001b[0m\u001b[1mDuring: typing of get attribute at (3)\u001b[0m\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n\u001b[0m\n raised from /home/nfs/brmiller/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py:1071\n\u001b[0m\n\u001b[0m\u001b[1mDuring: resolving callee type: Function()\u001b[0m\n\u001b[0m\u001b[1mDuring: typing of call at (8)\n\u001b[0m\n\u001b[1m\nFile \"\", line 8:\u001b[0m\n\u001b[1m\u001b[0m\n" + ] + } + ], + "source": [ + "df = df.apply_rows(gpu_add_error,\n", + " incols=['x', 'y'],\n", + " outcols={'out':np.float64},\n", + " kwargs={})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "material-financing", + "metadata": {}, + "source": [ + "#### Why\n", + "This is because nulls are generally handled \"pessimistically\", which roughly means the value of the output mask bit is just set to be a big `or` between all the input column's bitmasks. This isn't a problem in pandas, because the UDF is applied by looping through the rows and individually passing each value elementwise through the UDF. When it encounters a null, the value that gets passed is `pd.NA`, which behaves the way it needs to for the function to return the correct value for that row. \n", + "#### The two things we want to do then are:\n", + "- Make the API feel a little more natural\n", + "- explicitly be able to handle nulls in a dynamic way" + ] + }, + { + "cell_type": "markdown", + "id": "informal-wisdom", + "metadata": {}, + "source": [ + "# Detour: The cuDF UnaryOp Compilation Pipeline" + ] + }, + { + "cell_type": "markdown", + "id": "mineral-roman", + "metadata": {}, + "source": [ + "```\n", + "Python Function -> Numba -> PTX Code -> libcudf parser -> inlineable function -> Jitify -> Execution\n", + " | \n", + " data pointers ---------^\n", + " headers ---------------^\n", + " extra kernel code -----^\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "overall-discretion", + "metadata": {}, + "source": [ + "The proposed solution to this problem draws heavily on the existing concepts in cuDF's unaryop machinery. This is a situation where the API feels really natural and is quite compatible with pandas, even though ours is named `applymap` and theirs is named `apply` for some reason." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "expired-finder", + "metadata": {}, + "outputs": [], + "source": [ + "x = cudf.Series([1, None, 3])\n", + "\n", + "def f(x):\n", + " return x + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "attended-mount", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 \n", + "2 4\n", + "dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.applymap(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "guided-slave", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 \n", + "2 3\n", + "dtype: Int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pd = x.to_pandas(nullable=True)\n", + "x_pd" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "modern-barrier", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 \n", + "2 4\n", + "dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pd.apply(f)" + ] + }, + { + "cell_type": "markdown", + "id": "general-madagascar", + "metadata": {}, + "source": [ + "The null handling here is pretty simple - it's always a copy of the original bitmask. But that's not why we're here. Let's pop the hood. From https://github.com/rapidsai/cudf/blob/branch-0.19/python/cudf/cudf/core/column/numerical.py#L721-L726:\n", + "\n", + "```\n", + "def _numeric_column_unaryop(operand: ColumnBase, op: str) -> ColumnBase:\n", + " if callable(op):\n", + " return libcudf.transform.transform(operand, op)\n", + "\n", + " op = libcudf.unary.UnaryOp[op.upper()]\n", + " return libcudf.unary.unary_operation(operand, op)\n", + "```\n", + "\n", + "From here the `transform` cython picks up the callable python function as well as the `Column` to which it is to be applied. Here's some pseudocode for what happens inside it:\n", + "\n", + "```\n", + "def transform(Column input, op):\n", + " signature = get_signature(input)\n", + " compiled_op = cudautils.compile_udf(op, signature)\n", + " c_str = compiled_op[0].encode('UTF-8')\n", + "\n", + " c_output = move(\n", + " libcudf_transform(\n", + " input,\n", + " c_str\n", + " )\n", + "\n", + " )\n", + "\n", + " return Column.from_unique_ptr(move(c_output))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "explicit-lebanon", + "metadata": {}, + "source": [ + "What we have so far then is:\n", + "1. Our input column\n", + "2. A PTX function compiled by Numba based off the python function, and the type of the argument being passed\n", + "\n", + "`cudautils.compile_udf` calls out to Numba to transform the pure python function into PTX code through LLVM IR and a series of compilation steps. The pipeline inside Numba goes something like:\n", + "\n", + "```\n", + "Python function -> python bytecode -> type inference -> lowering -> LLVM IR -> PTX code\n", + "```\n", + "\n", + "#### Python bytecode: These are instructions for the python interpreter" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fifteen-device", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 4 0 LOAD_FAST 0 (x)\n", + " 2 LOAD_CONST 1 (1)\n", + " 4 BINARY_ADD\n", + " 6 RETURN_VALUE\n" + ] + } + ], + "source": [ + "import dis\n", + "dis.dis(f)" + ] + }, + { + "cell_type": "markdown", + "id": "latter-helping", + "metadata": {}, + "source": [ + "#### Type inference: Assembly level languages only operate in terms of primitive types. \n", + "\n", + "Thus to generate LLVM IR, Numba needs to know the types of every variable at every point during the function, from arguments to return values. This is one of the reasons `signature` is a required arg to `compile_udf`. We only get this information at runtime, because the user can pass anything into their UDF. When they apply their UDF to a `Series`, it's only at that point Numba can know that the `x` in `f(x)` is of type `int64` for instance - and only then can it actually complete the type inference portion of the process.\n", + "\n", + "#### Lowering\n", + "Once type inference is complete and Numba knows the types of all the input, output, and intermediate variables, it combines that with the algorithmic information from the python function's bytecode and produces LLVM IR in a process called \"lowering\". LLVM IR is like a platform independent assembly language. One can compile from LLVM IR to assembly code for any platform, including into PTX code for NVIDIA GPUs\n", + "\n", + "\n", + "#### What does this mean for us?\n", + "It means that what we get out of `cudautils.compile_udf` is an actual string containing a PTX function, compiled by Numba for arguments of the type `input.dtype`. It is important to note that this function is a function that operates, like the original function, on a single element. It does NOT contain a kernel. In fact, here's exactly what it is:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "direct-venezuela", + "metadata": {}, + "outputs": [], + "source": [ + "from cudf.utils.cudautils import compile_udf" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "engaged-coaching", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "//\n", + "// Generated by NVIDIA NVVM Compiler\n", + "//\n", + "// Compiler Build ID: CL-27506705\n", + "// Cuda compilation tools, release 10.2, V10.2.89\n", + "// Based on LLVM 3.4svn\n", + "//\n", + "\n", + ".version 6.5\n", + ".target sm_70\n", + ".address_size 64\n", + "\n", + "\t// .globl\t_ZN8__main__5f$248Ex\n", + ".common .global .align 8 .u64 _ZN08NumbaEnv8__main__5f$248Ex;\n", + "\n", + ".visible .func (.param .b32 func_retval0) _ZN8__main__5f$248Ex(\n", + "\t.param .b64 _ZN8__main__5f$248Ex_param_0,\n", + "\t.param .b64 _ZN8__main__5f$248Ex_param_1\n", + ")\n", + "{\n", + "\t.reg .b32 \t%r<2>;\n", + "\t.reg .b64 \t%rd<4>;\n", + "\n", + "\n", + "\tld.param.u64 \t%rd1, [_ZN8__main__5f$248Ex_param_0];\n", + "\tld.param.u64 \t%rd2, [_ZN8__main__5f$248Ex_param_1];\n", + "\tadd.s64 \t%rd3, %rd2, 1;\n", + "\tst.u64 \t[%rd1], %rd3;\n", + "\tmov.u32 \t%r1, 0;\n", + "\tst.param.b32\t[func_retval0+0], %r1;\n", + "\tret;\n", + "}\n", + "\n", + "\n", + "\u0000\n" + ] + } + ], + "source": [ + "from numba.np import numpy_support\n", + "numba_type = numpy_support.from_dtype(np.dtype('int64'))\n", + "ptx, _ = compile_udf(f, (numba_type,))\n", + "\n", + "print(ptx)" + ] + }, + { + "cell_type": "markdown", + "id": "inclusive-endorsement", + "metadata": {}, + "source": [ + "```\n", + "// .globl main\n", + "\n", + ".visible .func (.param .b32 return_value) main(\n", + "\t.param .b64 param_0, # TWO input parameters\n", + "\t.param .b64 param_1 \n", + ")\n", + "{\n", + "\t.reg .b32 \t%r<2>; # declare two 32-bit registers, named %r1 and %r2\n", + "\t.reg .b64 \t%rd<4>; # declare 4 64-bit registers named %rd1, %rd2, %rd3, %rd4\n", + "\n", + "\n", + "\tld.param.u64 \t%rd1, [param_0]; # load param_0 into %rd1\n", + "\tld.param.u64 \t%rd2, [param_1]; # load param_1 into %rd2\n", + "\tadd.s64 \t%rd3, %rd2, 1; # take the value of %rd2 (e.g. param_1) add 1, place in %rd3\n", + "\tst.u64 \t[%rd1], %rd3; # store the value of %rd3 into the location pointer to by %rd1\n", + "\tmov.u32 \t%r1, 0; # move 0 into %r1\n", + "\tst.param.b32\t[func_retval0+0], %r1; # place the value of %r1 into the return value\n", + "\tret; # return 0\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "imperial-quality", + "metadata": {}, + "source": [ + "# What happens next?\n", + "Libcudf takes it from here. Broadly speaking, what happens at this point is libcudf hacks together a string (which consists of several elements) which ends up being handed off to jitify and compiled into a final kernel. Jitify then launches that kernel, taking the pointer to the beginning of the actual data column to be transformed as an argument. The three elements are:\n", + "\n", + "1. A header\n", + "2. A outer \"calling\" kernel that generically calls the PTX function\n", + "3. A processed version of the PTX function that inlines it directly into CUDA\n", + "\n", + "The libcudf parser essentially takes the PTX function as above and turns it into a generically callable inlinable function. The final file that gets passed off to jitify to be compiled looks like this:\n", + "\n", + "```\n", + "\n", + "#pragma once\n", + "\n", + "// Include Jitify's cstddef header first\n", + "#include \n", + "\n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "\n", + "template \n", + " __global__\n", + " void kernel(cudf::size_type size,\n", + " TypeOut* out_data, TypeIn* in_data) {\n", + " int tid = threadIdx.x;\n", + " int blkid = blockIdx.x;\n", + " int blksz = blockDim.x;\n", + " int gridsz = gridDim.x;\n", + "\n", + " int start = tid + blkid * blksz;\n", + " int step = blksz * gridsz;\n", + "\n", + " for (cudf::size_type i=start; i;\");\n", + " /** .reg .b32 \t%r<2> */\n", + " asm volatile (\" .reg .b64 _rd<4>;\");\n", + " /** .reg .b64 \t%rd<4> */\n", + " asm volatile (\" mov.u64 _rd1, %0;\": : \"l\"(_ZN8__main__5f_241Ex_param_0));\n", + " /** ld.param.u64 \t%rd1, [_ZN8__main__5f$241Ex_param_0]\n", + " asm volatile (\" mov.u64 _rd2, %0;\": : \"l\"(_ZN8__main__5f_241Ex_param_1));\n", + " /** ld.param.u64 \t%rd2, [_ZN8__main__5f$241Ex_param_1] \n", + " asm volatile (\" add.s64 _rd3, _rd2, 1;\");\n", + " /** add.s64 \t%rd3, %rd2, 1 */\n", + " asm volatile (\" st.u64 [_rd1], _rd3;\");\n", + " /** st.u64 \t[%rd1], %rd3 */\n", + " asm volatile (\" mov.u32 _r1, 0;\");\n", + " /** mov.u32 \t%r1, 0 */\n", + " asm volatile (\" /** *** SNIP. *** */\");\n", + " /** st.param.b32\t[func_retval0+0], %r1 */\n", + " asm volatile (\"bra RETTGT;\");\n", + " asm volatile (\"RETTGT:}\");\n", + "\n", + "}\n", + "\n", + "```\n", + "\n", + "Apart from returning the data back to the user, that's more or less the process. " + ] + }, + { + "cell_type": "markdown", + "id": "musical-membership", + "metadata": {}, + "source": [ + "# What does all this have to do with NAs?\n", + "The pipeline is based on the idea that anything you could want to do with a single value of `x` arithmatically is expressable as a generic PTX function of `x` as along with some type information, that numba can generate for you. The rest of the machinery is just meant to deliver the data to this function threadwise. We're going to extend this concept to a function of four variables instead of one: a masked binary operation `x + y` where the four arguments are:\n", + "\n", + "1. `x`\n", + "2. `y`\n", + "3. `x.mask`\n", + "4. `y.mask`\n", + "\n", + "\n", + "We're going to modify the general kernel that calls `GENERIC_UNARY_OP` and generalizes it to accept these four arguments and call a `GENERIC_BINARY_OP` instead (with two extra arguments - the mask bools)\n", + "\n", + "# Creating a Numba extension type\n", + "\n", + "Remember how Numba produces PTX code from a python function and some type information? We're going to create a new Type in Numba that is build around a Struct:\n", + "\n", + "```\n", + "struct Masked {\n", + " int64_t value;\n", + " bool valid;\n", + "}\n", + "```\n", + "And we're going to add an overload of `add` (`+`, `operator.add`) to Numba's registry of function signatures that correctly handles null semantics. Then we're going to JIT the incoming python function and use a `Masked` type for every argument. " + ] + }, + { + "cell_type": "markdown", + "id": "qualified-communications", + "metadata": {}, + "source": [ + "#### Tell Numba that a `MaskedType` exists, and not much else" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "boring-orleans", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.core.extending import types\n", + "class MaskedType(types.Type):\n", + " # A corresponding MaskedType for numba\n", + " # numba can only generate LLVM IR for things\n", + " # that it recognizes. This is the most basic\n", + " # thing needed for numba to recognize the type,\n", + " # all it really says is \"there's a type, \n", + " # called MaskedType\". name is for __repr__\n", + " def __init__(self):\n", + " super().__init__(name=\"Masked\")\n", + " \n", + "numba_masked = MaskedType()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "annual-regard", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.core.extending import make_attribute_wrapper\n", + "\n", + "make_attribute_wrapper(MaskedType, \"value\", \"value\")\n", + "make_attribute_wrapper(MaskedType, \"valid\", \"valid\")" + ] + }, + { + "cell_type": "markdown", + "id": "persistent-palace", + "metadata": {}, + "source": [ + "#### Tell Numba what this type looks like. In our case, it's a struct. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "opening-specific", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.core.extending import register_model, models\n", + "\n", + "@register_model(MaskedType)\n", + "class MaskedModel(models.StructModel):\n", + " def __init__(self, dmm, fe_type):\n", + " members = [(\"value\", types.int64), (\"valid\", types.bool_)]\n", + " models.StructModel.__init__(self, dmm, fe_type, members)" + ] + }, + { + "cell_type": "markdown", + "id": "posted-confirmation", + "metadata": {}, + "source": [ + "#### Register an overload of `operator.add` with Numba's registry of `CUDA` functions. \n", + "This is part of the typing phase. When we pass `f(x, y): return x + y` into Numba and say that `x` and `y` are of type `Masked`, it hits the `x + y` statement and goes looking for an overload of `add` with a signature matching those operands. It works by either finding a match and the end or not having one. This piece of code conditionally emits the signature it needs to find, when prompted with two arguments of type `Masked`. One can see how they might dynamically return different types depending on arguments. But this roughly says \"when Numba looks for an overload of `add` that takes two `Masked` as arguments, let it know that there is one, and it will return a `Masked`. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "subjective-marshall", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.cuda.cudadecl import registry as cuda_registry\n", + "import operator\n", + "from numba.core.typing.templates import AbstractTemplate\n", + "\n", + "\n", + "@cuda_registry.register_global(operator.add)\n", + "class MaskedScalarAdd(AbstractTemplate):\n", + " # abstracttemplate vs concretetemplate\n", + " def generic(self, args, kws):\n", + " if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType):\n", + " return signature(numba_masked, numba_masked, numba_masked)" + ] + }, + { + "cell_type": "markdown", + "id": "compliant-strengthening", + "metadata": {}, + "source": [ + "#### Implement Masked + Masked\n", + "So far, Numba knows:\n", + "- There's a `MaskedType`. \n", + "- There's an overload of `operator.add` that accepts two `MaskedType` and returns a `MaskedType`\n", + "\n", + "Now it essentially needs an implementation for that overload of `operator.add`." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "treated-pastor", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.cuda.cudaimpl import lower as cuda_lower\n", + "\n", + "@cuda_lower(operator.add, MaskedType, MaskedType)\n", + "def masked_scalar_add_impl(context, builder, sig, args):\n", + " # get the types from the signature\n", + " masked_type_1, masked_type_2 = sig.args\n", + " masked_return_type = sig.return_type\n", + "\n", + " # create LLVM IR structs\n", + " m1 = cgutils.create_struct_proxy(masked_type_1)(\n", + " context, builder, value=args[0]\n", + " )\n", + " m2 = cgutils.create_struct_proxy(masked_type_2)(\n", + " context, builder, value=args[1]\n", + " )\n", + " result = cgutils.create_struct_proxy(masked_return_type)(context, builder)\n", + "\n", + " valid = builder.and_(m1.valid, m2.valid)\n", + " result.valid = valid\n", + " with builder.if_then(valid):\n", + " result.value = builder.add(m1.value, m2.value)\n", + "\n", + " return result._getvalue()" + ] + }, + { + "cell_type": "markdown", + "id": "clear-assessment", + "metadata": {}, + "source": [ + "# Testing it Out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "formed-soccer", + "metadata": {}, + "outputs": [], + "source": [ + "from numba import cuda\n", + "def compile_masked(func):\n", + " signature = (numba_masked, numba_masked)\n", + " ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True)\n", + " return ptx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "essential-shade", + "metadata": {}, + "outputs": [], + "source": [ + "def f(x, y):\n", + " return x + y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "understanding-firmware", + "metadata": {}, + "outputs": [], + "source": [ + "#ptx = compile_masked(f)" + ] + }, + { + "cell_type": "markdown", + "id": "wound-communications", + "metadata": {}, + "source": [ + "```\n", + " // .globl _ZN8__main__6f$2411E6Masked6Masked \n", + ".common .global .align 8 .u64 _ZN08NumbaEnv8__main__6f$2411E6Masked6Masked; \n", + " \n", + ".visible .func (.param .b32 func_retval0) _ZN8__main__6f$2411E6Masked6Masked( \n", + " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_0, \n", + " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_1, \n", + " .param .b32 _ZN8__main__6f$2411E6Masked6Masked_param_2, \n", + " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_3, \n", + " .param .b32 _ZN8__main__6f$2411E6Masked6Masked_param_4 \n", + ") \n", + "{ \n", + " .reg .pred %p<4>; \n", + " .reg .b16 %rs<4>; \n", + " .reg .b32 %r<2>; \n", + " .reg .b64 %rd<6>; \n", + " \n", + " \n", + " ld.param.u64 %rd1, [_ZN8__main__6f$2411E6Masked6Masked_param_0]; \n", + " ld.param.u64 %rd2, [_ZN8__main__6f$2411E6Masked6Masked_param_1]; \n", + " ld.param.u64 %rd3, [_ZN8__main__6f$2411E6Masked6Masked_param_3]; \n", + " ld.param.u8 %rs1, [_ZN8__main__6f$2411E6Masked6Masked_param_2]; \n", + " setp.ne.s16 %p1, %rs1, 0; \n", + " ld.param.u8 %rs2, [_ZN8__main__6f$2411E6Masked6Masked_param_4]; \n", + " setp.ne.s16 %p2, %rs2, 0; \n", + " and.pred %p3, %p1, %p2; \n", + " add.s64 %rd4, %rd3, %rd2; \n", + " selp.b64 %rd5, %rd4, 0, %p3; \n", + " selp.u16 %rs3, 1, 0, %p3; \n", + " st.u64 [%rd1], %rd5; \n", + " st.u8 [%rd1+8], %rs3; \n", + " mov.u32 %r1, 0; \n", + " st.param.b32 [func_retval0+0], %r1; \n", + " ret; \n", + "} \n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "atmospheric-database", + "metadata": {}, + "source": [ + "#### Then, this is the whole file being passed to jitify:\n", + "\n", + "```cuda\n", + "\n", + " #pragma once\n", + "\n", + " // Include Jitify's cstddef header first\n", + " #include \n", + "\n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + "\n", + " struct Masked {\n", + " int64_t value;\n", + " bool valid;\n", + " };\n", + " \n", + " \n", + "\n", + " void null_kernel(cudf::size_type size,\n", + " TypeOut* out_data, \n", + " TypeLhs* lhs_data,\n", + " TypeRhs* rhs_data,\n", + " bool* out_mask,\n", + " cudf::bitmask_type const* lhs_mask,\n", + " cudf::size_type lhs_offset,\n", + " cudf::bitmask_type const* rhs_mask,\n", + " cudf::size_type rhs_offset\n", + " ) {\n", + " int tid = threadIdx.x;\n", + " int blkid = blockIdx.x;\n", + " int blksz = blockDim.x;\n", + " int gridsz = gridDim.x;\n", + "\n", + " int start = tid + blkid * blksz;\n", + " int step = blksz * gridsz;\n", + "\n", + " Masked output;\n", + "\n", + " char l_valid;\n", + " char r_valid;\n", + "\n", + " long int l_data;\n", + " long int r_data;\n", + "\n", + " for (cudf::size_type i=start; i;\"); \n", + " /** .reg .pred %p<4> */ \n", + " \n", + " asm volatile (\" .reg .b16 _rs<4>;\"); \n", + " /** .reg .b16 %rs<4> */ \n", + " \n", + " asm volatile (\" .reg .b32 _r<2>;\"); \n", + " /** .reg .b32 %r<2> */ \n", + " \n", + " asm volatile (\" .reg .b64 _rd<6>;\"); \n", + " /** .reg .b64 %rd<6> */ \n", + "\n", + " asm volatile (\" mov.u64 _rd1, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_0));\n", + " /** ld.param.u64 %rd1, [_ZN8__main__6f$2413E6Masked6Masked_param_0] */\n", + "\n", + " asm volatile (\" mov.u64 _rd2, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_1));\n", + " /** ld.param.u64 %rd2, [_ZN8__main__6f$2413E6Masked6Masked_param_1] */\n", + "\n", + " asm volatile (\" mov.u64 _rd3, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_3));\n", + " /** ld.param.u64 %rd3, [_ZN8__main__6f$2413E6Masked6Masked_param_3] */\n", + "\n", + " asm volatile (\" cvt.u8.u8 _rs1, %0;\": : \"h\"( static_cast(_ZN8__main__6f_2413E6Masked6Masked_param_2)));\n", + " /** ld.param.u8 %rs1, [_ZN8__main__6f$2413E6Masked6Masked_param_2] */\n", + "\n", + " asm volatile (\" setp.ne.s16 _p1, _rs1, 0;\");\n", + " /** setp.ne.s16 %p1, %rs1, 0 */\n", + "\n", + " asm volatile (\" cvt.u8.u8 _rs2, %0;\": : \"h\"( static_cast(_ZN8__main__6f_2413E6Masked6Masked_param_4)));\n", + " /** ld.param.u8 %rs2, [_ZN8__main__6f$2413E6Masked6Masked_param_4] */\n", + "\n", + " asm volatile (\" setp.ne.s16 _p2, _rs2, 0;\");\n", + " /** setp.ne.s16 %p2, %rs2, 0 */\n", + "\n", + " asm volatile (\" and.pred _p3, _p1, _p2;\");\n", + " /** and.pred %p3, %p1, %p2 */\n", + "\n", + " asm volatile (\" add.s64 _rd4, _rd3, _rd2;\");\n", + " /** add.s64 %rd4, %rd3, %rd2 */\n", + "\n", + " asm volatile (\" selp.b64 _rd5, _rd4, 0, _p3;\");\n", + " /** selp.b64 %rd5, %rd4, 0, %p3 */\n", + "\n", + " asm volatile (\" selp.u16 _rs3, 1, 0, _p3;\");\n", + " /** selp.u16 %rs3, 1, 0, %p3 */\n", + "\n", + " asm volatile (\" st.u64 [_rd1], _rd5;\");\n", + " /** st.u64 [%rd1], %rd5 */\n", + "\n", + " asm volatile (\" st.u8 [_rd1+8], _rs3;\");\n", + " /** st.u8 [%rd1+8], %rs3 */\n", + "\n", + " asm volatile (\" mov.u32 _r1, 0;\");\n", + " /** mov.u32 %r1, 0 */\n", + "\n", + " asm volatile (\" /** *** The way we parse the CUDA PTX assumes the function returns the return value through the first function parameter. Thus the `st.param.***` instructions are not processed. *** */\");\n", + " /** st.param.b32 [func_retval0+0], %r1 */\n", + "\n", + " asm volatile (\"bra RETTGT;\");\n", + "\n", + "\n", + " asm volatile (\"RETTGT:}\");} \n", + " \n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "concrete-pillow", + "metadata": {}, + "source": [ + "# Test it\n", + "Here are some very basic cython bindings just used for the purposes of testing this exact functionality\n", + "```\n", + "def masked_binary_op(Column A, Column B, op, Column output_column, Column output_mask):\n", + " cdef column_view A_view = A.view()\n", + " cdef column_view B_view = B.view()\n", + "\n", + " cdef string c_str\n", + " cdef type_id c_tid\n", + " cdef data_type c_dtype\n", + "\n", + " if A.dtype != np.dtype('int64') or B.dtype != np.dtype('int64'):\n", + " raise TypeError('int64 please')\n", + " \n", + " \n", + " from cudf.core.udf import compile_udf\n", + " c_str = compile_udf(op).encode('UTF-8')\n", + "\n", + " c_tid = (\n", + " np_to_cudf_types[np.dtype('int64')]\n", + " )\n", + " c_dtype = data_type(c_tid)\n", + "\n", + " cdef column_view outcol_view = output_column.view()\n", + " cdef column_view outmsk_view = output_mask.view()\n", + "\n", + " with nogil:\n", + " c_output = move(libcudf_transform.masked_binary_op(\n", + " A_view,\n", + " B_view,\n", + " c_str,\n", + " c_dtype,\n", + " outcol_view,\n", + " outmsk_view\n", + " ))\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "after-fellow", + "metadata": {}, + "outputs": [], + "source": [ + "from cudf._lib.transform import masked_binary_op\n", + "\n", + "def demo_udf(func, s1, s2):\n", + " col1, col2 = s1._column, s2._column\n", + "\n", + " output_column = cudf.core.column.as_column(np.arange(8), dtype='int64')\n", + " output_mask = cudf.core.column.as_column([False] * 8)\n", + "\n", + " result_col = masked_binary_op(col1, col2, func, output_column, output_mask)\n", + " return cudf.Series(result_col)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "phantom-square", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 \n", + "2 \n", + "3 \n", + "4 6\n", + "5 \n", + "6 10\n", + "7 \n", + "dtype: int64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def f(x, y):\n", + " return x + y\n", + "\n", + "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", + "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", + "\n", + "demo_udf(f, s1, s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "southern-stationery", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 \n", + "2 \n", + "3 \n", + "4 6\n", + "5 \n", + "6 10\n", + "7 \n", + "dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1 + s2" + ] + }, + { + "cell_type": "markdown", + "id": "recovered-premiere", + "metadata": {}, + "source": [ + "# `cudf.NA`\n", + "In general, we wan't the capability to work with `cudf.NA` inside our functions directly. To do this, we're just going to reapply the same machinery to overload what happens when we add a `MaskedType` to `cudf.NA`." + ] + }, + { + "cell_type": "markdown", + "id": "filled-malta", + "metadata": {}, + "source": [ + "#### Create an NAType" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "chemical-trick", + "metadata": {}, + "outputs": [], + "source": [ + "from cudf.core.scalar import _NAType\n", + "class NAType(types.Type):\n", + " # \"There is a type called NAType\"\n", + " def __init__(self):\n", + " super().__init__(name=\"NA\")\n", + "\n", + "numba_na = NAType()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "southern-prague", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numba.core.datamodel.models.OpaqueModel" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from numba.core.extending import typeof_impl\n", + "@typeof_impl.register(_NAType)\n", + "def typeof_na(val, c):\n", + " # instances of _NAType will be \n", + " # treaded as instances of NAType. \n", + " return numba_na\n", + "\n", + "\n", + "\n", + "register_model(NAType)(models.OpaqueModel)" + ] + }, + { + "cell_type": "markdown", + "id": "accessible-monthly", + "metadata": {}, + "source": [ + "#### `operator.add` typing for Masked <-> NA" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "harmful-glossary", + "metadata": {}, + "outputs": [], + "source": [ + "@cuda_registry.register_global(operator.add)\n", + "class MaskedScalarAddNull(AbstractTemplate):\n", + " def generic(self, args, kws):\n", + " if isinstance(args[0], MaskedType) and isinstance(args[1], NAType):\n", + " return signature(numba_masked, numba_masked, numba_na)\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "brown-cheese", + "metadata": {}, + "source": [ + "#### Lowering: AKA what to actually do when this is requested\n", + "This says that when `+` is invoked between a `MaskedType` and an `NAType`, to make a new `MaskedType`, set it's validity to zero and return it." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "encouraging-reynolds", + "metadata": {}, + "outputs": [], + "source": [ + "from numba.cuda.cudaimpl import registry as cuda_lowering_registry\n", + "\n", + "@cuda_lower(operator.add, MaskedType, NAType)\n", + "def masked_scalar_add_na_impl(context, builder, sig, args):\n", + "# return_type = sig.return_type\n", + " # use context to get llvm type for a bool\n", + " result = cgutils.create_struct_proxy(numba_masked)(context, builder)\n", + " result.valid = context.get_constant(types.boolean, 0)\n", + " return result._getvalue()\n", + "\n", + "\n", + "@cuda_lowering_registry.lower_constant(NAType)\n", + "def constant_dummy(context, builder, ty, pyval):\n", + " # This handles None, etc.\n", + " return context.get_dummy_value()" + ] + }, + { + "cell_type": "markdown", + "id": "productive-rings", + "metadata": {}, + "source": [ + "# Constants\n", + "At this point the pattern is familiar. Register an overload that emits a signature if the operands match a `MaskedType` and a constant. The lowering is logically fairly simple. If the `MaskedType` is null, the answer is null, else the answer is a new `MaskedType` whose `value` is the sum of the inputs `value` and the constant." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "short-makeup", + "metadata": {}, + "outputs": [], + "source": [ + "from llvmlite import ir\n", + "\n", + "@cuda_registry.register_global(operator.add)\n", + "class MaskedScalarAddConstant(AbstractTemplate):\n", + " def generic(self, args, kws):\n", + " if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer):\n", + " return signature(numba_masked, numba_masked, types.int64)\n", + "\n", + "@cuda_lower(operator.add, MaskedType, types.Integer)\n", + "def masked_scalar_add_constant_impl(context, builder, sig, input_values):\n", + " masked_type, const_type = sig.args\n", + "\n", + " indata = cgutils.create_struct_proxy(masked_type)(context, builder, value=input_values[0])\n", + " result = cgutils.create_struct_proxy(numba_masked)(context, builder)\n", + " #to_add_const = context.get_constant(const_type, input_values[1])\n", + "\n", + " result.valid = context.get_constant(types.boolean, 0)\n", + " with builder.if_then(indata.valid):\n", + " result.value = builder.add(indata.value, input_values[1])\n", + " result.valid = context.get_constant(types.boolean, 1)\n", + "\n", + " return result._getvalue()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "entitled-wealth", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "\n", + "def f(x, y):\n", + " return x + y + cudf.NA\n", + "\n", + "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", + "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", + "\n", + "result = demo_udf(f, s1, s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "genuine-davis", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "polyphonic-second", + "metadata": {}, + "outputs": [], + "source": [ + "def f(x, y):\n", + " return x + y + 1\n", + "\n", + "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", + "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", + "\n", + "result = demo_udf(f, s1, s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "sporting-campbell", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 \n", + "2 \n", + "3 \n", + "4 7\n", + "5 \n", + "6 11\n", + "7 \n", + "dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "markdown", + "id": "focal-castle", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "romance-complaint", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "speaking-aquatic", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From cb85d88b7f581a134fd34e51f8c275c804b3029c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 12 Apr 2021 14:15:07 -0700 Subject: [PATCH 019/132] integrate jitify2 --- .../Modules/JitifyPreprocessKernels.cmake | 1 + cpp/src/transform/jit/binop_kernel.cu | 82 +++++++++++++++++++ cpp/src/transform/transform.cpp | 42 +++++++++- 3 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 cpp/src/transform/jit/binop_kernel.cu diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index eb1ade61440..fceed965c9e 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -56,6 +56,7 @@ endfunction() jit_preprocess_files(SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu + transform/jit/binop_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu ) diff --git a/cpp/src/transform/jit/binop_kernel.cu b/cpp/src/transform/jit/binop_kernel.cu new file mode 100644 index 00000000000..75c1e383d96 --- /dev/null +++ b/cpp/src/transform/jit/binop_kernel.cu @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Include Jitify's cstddef header first +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + + +namespace cudf { +namespace transformation { +namespace jit { + +struct Masked { + int64_t value; + bool valid; +}; + +template +__global__ +void binop_kernel(cudf::size_type size, + TypeOut* out_data, + TypeLhs* lhs_data, + TypeRhs* rhs_data, + bool* out_mask, + cudf::bitmask_type const* lhs_mask, + cudf::size_type lhs_offset, + cudf::bitmask_type const* rhs_mask, + cudf::size_type rhs_offset +) { + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + Masked output; + char l_valid; + char r_valid; + long int l_data; + long int r_data; + + for (cudf::size_type i=start; i +#include #include #include @@ -28,10 +29,6 @@ #include #include -#include -#include -#include - #include namespace cudf { @@ -67,7 +64,41 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } +void binary_operation(column_view const& A, + column_view const& B, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + std::string kernel_name = + jitify2::reflection::Template("cudf::transformation::jit::binop_kernel") // + .instantiate(cudf::jit::get_type_name(outcol_view.type()), // list of template arguments + cudf::jit::get_type_name(A.type()), + cudf::jit::get_type_name(B.type())); + + std::string cuda_source = cudf::jit::parse_single_function_ptx( + binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); + rmm::cuda_stream_view stream; + + cudf::jit::get_program_cache(*transform_jit_binop_kernel_cu_jit) + .get_kernel( + kernel_name, {}, {{"transform/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(outcol_view.size(), + cudf::jit::get_data_ptr(outcol_view), + cudf::jit::get_data_ptr(A), + cudf::jit::get_data_ptr(B), + cudf::jit::get_data_ptr(outmsk_view), + A.null_mask(), + A.offset(), + B.null_mask(), + B.offset() + ); +} +/* void binary_operation(column_view const& A, column_view const& B, std::string const& binary_udf, @@ -77,6 +108,8 @@ void binary_operation(column_view const& A, rmm::mr::device_memory_resource* mr) { + std::string kernel_name + std::string hash = "prog_transform" + std::to_string(std::hash{}(binary_udf)); std::cout << binary_udf << std::endl; @@ -118,6 +151,7 @@ void binary_operation(column_view const& A, ); } +*/ } // namespace jit } // namespace transformation From ad067eb7d946014fafdaed55020bb6f20be42792 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 12 Apr 2021 14:16:02 -0700 Subject: [PATCH 020/132] minor cleanup --- python/cudf/cudf/core/udf.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 7bb59b6ee6f..564b2cd2cf9 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -66,12 +66,6 @@ def __init__(self, dmm, fe_type): members = [("value", types.int64), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) -#@register_model(NAType) # check what model NoneType uses -#class NAModel(models.OpaqueModel): -# def __init__(self, dmm, fe_type): -# members = [] -# models.StructModel.__init__(self, dmm, fe_type, members) - register_model(NAType)(models.OpaqueModel) @lower_builtin(Masked, types.int64, types.bool_) From 237af25dd7784c21827825d826ab454c9bcd656f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 22 Apr 2021 10:57:17 -0700 Subject: [PATCH 021/132] pushing forward with ND transform --- .../Modules/JitifyPreprocessKernels.cmake | 1 + cpp/include/cudf/transform.hpp | 7 ++ .../transform/jit/baked_udf_requirements.cu | 2 + cpp/src/transform/transform.cpp | 112 +++++++++-------- python/cudf/cudf/_lib/cpp/transform.pxd | 8 ++ python/cudf/cudf/_lib/transform.pyx | 32 +++++ python/cudf/cudf/core/udf.py | 114 +++++++++++++++--- 7 files changed, 211 insertions(+), 65 deletions(-) create mode 100644 cpp/src/transform/jit/baked_udf_requirements.cu diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index fceed965c9e..c481301e335 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -59,6 +59,7 @@ jit_preprocess_files(SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src transform/jit/binop_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu + transform/jit/baked_udf_requirements.cu ) add_custom_target(jitify_preprocess_run DEPENDS ${JIT_PREPROCESSED_FILES}) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 5ba5ac26d86..8ed9b72c97d 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -63,6 +63,13 @@ std::unique_ptr masked_binary_op( column_view const& outmsk_view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr generalized_masked_op( + table_view const& data_view, + std::string const& udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a null_mask from `input` by converting `NaN` to null and diff --git a/cpp/src/transform/jit/baked_udf_requirements.cu b/cpp/src/transform/jit/baked_udf_requirements.cu new file mode 100644 index 00000000000..d31f1d9f899 --- /dev/null +++ b/cpp/src/transform/jit/baked_udf_requirements.cu @@ -0,0 +1,2 @@ +#include +#include diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index ef26fe4ec3e..f1ea7956c0d 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -16,6 +16,8 @@ #include #include +#include + #include #include @@ -28,6 +30,7 @@ #include #include #include +#include #include @@ -98,60 +101,43 @@ void binary_operation(column_view const& A, B.offset() ); } -/* -void binary_operation(column_view const& A, - column_view const& B, - std::string const& binary_udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) -{ - - std::string kernel_name - std::string hash = "prog_transform" + std::to_string(std::hash{}(binary_udf)); - - std::cout << binary_udf << std::endl; - - std::string cuda_source = code::kernel_header; - cuda_source += cudf::jit::parse_single_function_ptx( - binary_udf, "GENERIC_BINARY_OP", cudf::jit::get_type_name(output_type), {0}); +void generalized_operation(table_view const& data_view, + std::string const& udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + rmm::cuda_stream_view stream; + //std::string cuda_source = cudf::jit::parse_single_function_ptx( + // udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); + /* + size_t num_cols = data_view.num_columns(); + std::vector input_types(num_cols); + std::vector args(num_cols); - cuda_source += code::null_kernel; - std::cout << cuda_source << std::endl; + column_view this_view; + for (size_t i = 0; i < num_cols; i++) { + this_view = data_view.column(i); + input_types[i] = cudf::jit::get_type_name(this_view.type()); + } + */ - rmm::cuda_stream_view stream; + std::string kernel_name = + jitify2::reflection::Template("genop_kernel") // + .instantiate(cudf::jit::get_type_name(outcol_view.type())); - // Launch the jitify kernel - - cudf::jit::launcher(hash, - cuda_source, - header_names, - cudf::jit::compiler_flags, - headers_code, - stream) - .set_kernel_inst("masked_binary_op_kernel", - { - cudf::jit::get_type_name(outcol_view.type()), - cudf::jit::get_type_name(A.type()), - cudf::jit::get_type_name(B.type()), - } - ) - .launch(outcol_view.size(), - cudf::jit::get_data_ptr(outcol_view), - cudf::jit::get_data_ptr(A), - cudf::jit::get_data_ptr(B), - cudf::jit::get_data_ptr(outmsk_view), - A.null_mask(), - A.offset(), - B.null_mask(), - B.offset() - ); + cudf::jit::get_program_cache(*transform_jit_baked_udf_requirements_cu_jit) + .get_kernel( + kernel_name, {}, {{"transform/jit/operation-udf.hpp", udf}}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(outcol_view.size(), + static_cast(7), // + cudf::jit::get_data_ptr(outcol_view)); } -*/ } // namespace jit } // namespace transformation @@ -197,6 +183,26 @@ std::unique_ptr masked_binary_op_inner(column_view const& A, return output; } +std::unique_ptr generalized_masked_op_inner( + table_view const& data_view, + std::string const& udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + rmm::cuda_stream_view stream = rmm::cuda_stream_default; + + transformation::jit::generalized_operation(data_view, udf, output_type, outcol_view, outmsk_view, mr); + + std::unique_ptr output; + + return output; + +} + + + } // namespace detail std::unique_ptr transform(column_view const& input, @@ -217,9 +223,19 @@ std::unique_ptr masked_binary_op(column_view const& A, column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { - std::cout << "HERE!!" << std::endl; return detail::masked_binary_op_inner(A, B, binary_udf, output_type, outcol_view, outmsk_view, mr); } +std::unique_ptr generalized_masked_op( + table_view const& data_view, + std::string const& udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + return detail::generalized_masked_op_inner(data_view, udf, output_type, outcol_view, outmsk_view, mr); +} + } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 217b3374579..00d0306dbca 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -47,6 +47,14 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view outmask_view, ) except + + cdef unique_ptr[column] generalized_masked_op( + table_view data_view, + string udf, + data_type output_type, + column_view outcol_view, + column_view outmask_view + ) except + + cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input ) except + diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 235dc3e12bd..1b90a5d3a3f 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -127,6 +127,38 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) +def generalized_masked_op(Table data, op, Column output_column, Column output_mask): + cdef string c_str + cdef type_id c_tid + cdef data_type c_dtype + + cdef table_view data_view = data.view() + + from cudf.core.udf import compile_udf, demo_kernel + #c_str = compile_udf(op).encode('UTF-8') + c_str = demo_kernel.encode('UTF-8') + c_tid = ( + np_to_cudf_types[np.dtype('int64')] + ) + c_dtype = data_type(c_tid) + + cdef column_view outcol_view = output_column.view() + cdef column_view outmsk_view = output_mask.view() + + with nogil: + c_output = move(libcudf_transform.generalized_masked_op( + data_view, + c_str, + c_dtype, + outcol_view, + outmsk_view + )) + + output_mask_real = bools_to_mask(output_mask) + output_column = output_column.set_mask(output_mask_real) + + return output_column + def masked_binary_op(Column A, Column B, op, Column output_column, Column output_mask): cdef column_view A_view = A.view() cdef column_view B_view = B.view() diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 564b2cd2cf9..1931ce7e0d0 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -10,7 +10,8 @@ type_callable, typeof_impl, ) -from numba.core.typing import signature +from numba.core.typing import signature as nb_signature +from inspect import signature as py_signature from numba.core.typing.templates import AbstractTemplate from numba.cuda.cudadecl import registry as cuda_registry from numba.cuda.cudaimpl import lower as cuda_lower, registry as cuda_lowering_registry @@ -84,14 +85,14 @@ class MaskedScalarAdd(AbstractTemplate): # abstracttemplate vs concretetemplate def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): - return signature(numba_masked, numba_masked, numba_masked) + return nb_signature(numba_masked, numba_masked, numba_masked) @cuda_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - return signature(numba_masked, numba_masked, numba_na) + return nb_signature(numba_masked, numba_masked, numba_na) @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -140,23 +141,102 @@ def masked_add_py_2(m1, m2): return m1 + m2 def compile_udf(func): + n_params = len(py_signature(func).parameters) + to_compiler_sig = tuple(numba_masked for arg in range(n_params)) - signature = (numba_masked, numba_masked) - ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True) - return ptx + # Get the inlineable PTX function + ptx, _ = cuda.compile_ptx_for_current_device(func, to_compiler_sig, device=True) + + # get the kernel that calls the inlineable function + kernel = make_kernel(n_params) + return kernel, ptx NA = _NAType() -@cuda.jit(numba_masked(numba_masked), device=True) -def test_scalar_null_add(masked): - result = masked + NA +def make_kernel(n_params): + ''' + create a string containing the right templated kernel + for `func` + ''' + + indent = ' '*18 + + # Hack together the template string + result = '' + + templates = 'template " + result += templates + + # Hack together the function signature + sig = '\n__global__\nvoid genop_kernel(cudf::size_type size,\n' + sig += indent + "TypeOut* out_data,\n" + sig += indent + 'bool* out_mask,\n' + for i in range(n_params): + sig += indent + f"Type{i}* data_{i},\n" + sig += indent + f"cudf::bitmask_type const* mask_{i},\n" + sig += indent + f"cudf::size_type offset_{i},\n" + sig = sig[:-2] + ') {' + + result += sig + result += '\n' + + # standard thread block + result += '\n' + result += '\tint tid = threadIdx.x;\n' + result += '\tint blkid = blockIdx.x;\n' + result += '\tint blksz = blockDim.x;\n' + result += '\tint gridsz = gridDim.x;\n' + result += '\tint start = tid + blkid * blksz;\n' + result += '\tint step = blksz * gridsz;\n' + result += '\n' + + result += '\tMasked output;\n' + + for i in range(n_params): + result += f"\tchar valid_{i};\n" + + # main loop + result += "\tfor (cudf::size_type i=start; i +__global__ +void genop_kernel(cudf::size_type size, cudf::size_type value, TypeOut* out_data) { + + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + for (cudf::size_type i=start; i Date: Tue, 27 Apr 2021 14:07:24 -0700 Subject: [PATCH 022/132] variadic kernel up and running --- cpp/src/transform/jit/binop_kernel.cu | 58 ++++++++++++++++++++++++++- cpp/src/transform/transform.cpp | 45 ++++++++++++--------- python/cudf/cudf/_lib/transform.pyx | 2 +- 3 files changed, 83 insertions(+), 22 deletions(-) diff --git a/cpp/src/transform/jit/binop_kernel.cu b/cpp/src/transform/jit/binop_kernel.cu index 75c1e383d96..c81cd21dac9 100644 --- a/cpp/src/transform/jit/binop_kernel.cu +++ b/cpp/src/transform/jit/binop_kernel.cu @@ -31,6 +31,8 @@ #include #include +#include +#include namespace cudf { namespace transformation { @@ -40,7 +42,7 @@ struct Masked { int64_t value; bool valid; }; - +/* template __global__ void binop_kernel(cudf::size_type size, @@ -76,6 +78,60 @@ void binop_kernel(cudf::size_type size, out_mask[i] = output.valid; } } +*/ +template +__device__ auto make_args(cudf::size_type id, + TypeIn in_ptr, + MaskType in_mask, + OffsetType in_offset) +{ + bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; + return cuda::std::make_tuple(in_ptr[id], valid); +} + +template +__device__ auto make_args(cudf::size_type id, + InType in_ptr, + MaskType in_mask, // in practice, always cudf::bitmask_type const* + OffsetType in_offset, // in practice, always cudf::size_type + Arguments ... args) { + + bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; + return cuda::std::tuple_cat( + cuda::std::make_tuple(in_ptr[id], valid), + make_args(id, args...) + ); +} + + +template +__global__ +void generic_udf_kernel(cudf::size_type size, + TypeOut* out_data, + bool* out_mask, + Arguments ... args) +{ + + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + Masked output; + for (cudf::size_type i=start; iconfigure_1d_max_occupancy(0, 0, 0, stream.value()) // + generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // ->launch(outcol_view.size(), - cudf::jit::get_data_ptr(outcol_view), - cudf::jit::get_data_ptr(A), - cudf::jit::get_data_ptr(B), - cudf::jit::get_data_ptr(outmsk_view), - A.null_mask(), - A.offset(), - B.null_mask(), - B.offset() - ); + cudf::jit::get_data_ptr(outcol_view), + cudf::jit::get_data_ptr(outmsk_view), + cudf::jit::get_data_ptr(A), + A.null_mask(), // cudf::bitmask_type * + A.offset(), + cudf::jit::get_data_ptr(B), + B.null_mask(), + B.offset()); + } void generalized_operation(table_view const& data_view, diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 1b90a5d3a3f..68d8c9f6228 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -172,7 +172,7 @@ def masked_binary_op(Column A, Column B, op, Column output_column, Column output from cudf.core.udf import compile_udf - c_str = compile_udf(op).encode('UTF-8') + c_str = compile_udf(op)[1].encode('UTF-8') c_tid = ( np_to_cudf_types[np.dtype('int64')] From 591627c4333315352f432c898bd71a9f5b87f0f0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 28 Apr 2021 06:52:59 -0700 Subject: [PATCH 023/132] big plays --- cpp/include/cudf/transform.hpp | 3 +-- cpp/src/transform/transform.cpp | 32 ++++++++++++------------- python/cudf/cudf/_lib/cpp/transform.pxd | 3 +-- python/cudf/cudf/_lib/transform.pyx | 19 ++++----------- python/cudf/cudf/core/dataframe.py | 6 +++++ python/cudf/cudf/core/frame.py | 19 ++++++++++++++- 6 files changed, 45 insertions(+), 37 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 8ed9b72c97d..7e5c1797d11 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -55,8 +55,7 @@ std::unique_ptr transform( std::unique_ptr masked_binary_op( - column_view const& A, - column_view const& B, + table_view data_view, std::string const& binary_udf, data_type output_type, column_view const& outcol_view, diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 61c9bdd7fbb..b5c9b76ea6c 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -67,8 +67,7 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -void binary_operation(column_view const& A, - column_view const& B, +void binary_operation(table_view data_view, std::string const& binary_udf, data_type output_type, column_view const& outcol_view, @@ -76,9 +75,11 @@ void binary_operation(column_view const& A, rmm::mr::device_memory_resource* mr) { + column_view A = data_view.column(0); + column_view B = data_view.column(1); - std::string generic_kernel_name = - jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") + std::string generic_kernel_name = + jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") .instantiate(cudf::jit::get_type_name(outcol_view.type()), "int64_t*", "uint32_t*", @@ -170,19 +171,17 @@ std::unique_ptr transform(column_view const& input, return output; } -std::unique_ptr masked_binary_op_inner(column_view const& A, - column_view const& B, - std::string const& binary_udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) +std::unique_ptr masked_binary_op_inner(table_view data_view, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) { rmm::cuda_stream_view stream = rmm::cuda_stream_default; - transformation::jit::binary_operation(A, B, binary_udf, output_type, outcol_view, outmsk_view, mr); + transformation::jit::binary_operation(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); - std::unique_ptr output = make_fixed_width_column( - output_type, A.size(), copy_bitmask(A), cudf::UNKNOWN_NULL_COUNT, stream, mr); + std::unique_ptr output; return output; @@ -220,15 +219,14 @@ std::unique_ptr transform(column_view const& input, return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr); } -std::unique_ptr masked_binary_op(column_view const& A, - column_view const& B, +std::unique_ptr masked_binary_op(table_view data_view, std::string const& binary_udf, data_type output_type, column_view const& outcol_view, column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { - return detail::masked_binary_op_inner(A, B, binary_udf, output_type, outcol_view, outmsk_view, mr); + return detail::masked_binary_op_inner(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); } std::unique_ptr generalized_masked_op( diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 00d0306dbca..d7efc5a2d8f 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -39,8 +39,7 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[column] masked_binary_op( - column_view A, - column_view B, + table_view data_view, string binary_udf, data_type output_type, column_view outcol_view, diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 68d8c9f6228..027ff65a13c 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -159,21 +159,12 @@ def generalized_masked_op(Table data, op, Column output_column, Column output_ma return output_column -def masked_binary_op(Column A, Column B, op, Column output_column, Column output_mask): - cdef column_view A_view = A.view() - cdef column_view B_view = B.view() - - cdef string c_str +def masked_udf(Table incols, op, Column output_column, Column output_mask): + cdef table_view data_view = incols.data_view() + cdef string c_str = op.encode("UTF-8") cdef type_id c_tid cdef data_type c_dtype - if A.dtype != np.dtype('int64') or B.dtype != np.dtype('int64'): - raise TypeError('int64 please') - - - from cudf.core.udf import compile_udf - c_str = compile_udf(op)[1].encode('UTF-8') - c_tid = ( np_to_cudf_types[np.dtype('int64')] ) @@ -184,15 +175,13 @@ def masked_binary_op(Column A, Column B, op, Column output_column, Column output with nogil: c_output = move(libcudf_transform.masked_binary_op( - A_view, - B_view, + data_view, c_str, c_dtype, outcol_view, outmsk_view )) - #return Column.from_unique_ptr(move(c_output)) output_mask_real = bools_to_mask(output_mask) output_column = output_column.set_mask(output_mask_real) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d5393a724ec..1559a276029 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -35,6 +35,7 @@ from cudf.core.index import Index, RangeIndex, as_index from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer from cudf.core.series import Series +from cudf.core.udf import compile_udf from cudf.core.window import Rolling from cudf.utils import applyutils, docutils, ioutils, queryutils, utils from cudf.utils.docutils import copy_docstring @@ -4714,6 +4715,11 @@ def query(self, expr, local_dict=None): boolmask = queryutils.query_execute(self, expr, callenv) return self._apply_boolean_mask(boolmask) + def apply(self, func): + breakpoint() + return super()._apply(func) + + @applyutils.doc_apply() def apply_rows( self, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index dcf5044ed2f..87380175f3d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1418,6 +1418,23 @@ def _quantiles( result._copy_type_metadata(self) return result + def _apply(self, func): + from cudf.core.udf import compile_udf + + if not all(np.dtype('int64') == dtype for dtype in self.dtypes): + raise TypeError("Currently only int64 is supported") + + kernel, ptx = compile_udf(func) + + output_column = cudf.core.column.column_empty(row_count=len(self), dtype='int64') + output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') + + breakpoint() + result = cudf._lib.transform.masked_udf(self, ptx, output_column, output_mask) + breakpoint() + return result + + def rank( self, axis=0, @@ -1456,7 +1473,7 @@ def rank( pct : bool, default False Whether or not to display the returned rankings in percentile form. - +f Returns ------- same type as caller From c07e18707ef42af5b4e242a37bc6905ef0f3205c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 28 Apr 2021 07:59:27 -0700 Subject: [PATCH 024/132] general logic for building template instantiation arguments --- cpp/src/transform/transform.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index b5c9b76ea6c..3bef0b29063 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -75,18 +75,26 @@ void binary_operation(table_view data_view, rmm::mr::device_memory_resource* mr) { + std::vector template_types( + // A ptr, mask ptr, and offset for each column + // plus one for the type of the output column + (data_view.num_columns() * 3) + 1 + ); + template_types[0] = cudf::jit::get_type_name(outcol_view.type()); + for (int i = 0; i < data_view.num_columns(); i++) { + int offset = (i * 3) + 1; + template_types[offset] = cudf::jit::get_type_name(data_view.column(i).type()) + "*"; + template_types[offset + 1] = "uint32_t*"; + template_types[offset + 2] = "int64_t"; + } + + column_view A = data_view.column(0); column_view B = data_view.column(1); std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") - .instantiate(cudf::jit::get_type_name(outcol_view.type()), - "int64_t*", - "uint32_t*", - "int64_t", - "int64_t*", - "uint32_t*", - "int64_t"); + .instantiate(template_types); std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); From d21b858952bda5ad24bcaca99545361f63953f94 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 29 Apr 2021 08:03:28 -0700 Subject: [PATCH 025/132] cleanup --- python/cudf/cudf/core/frame.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 87380175f3d..e0d2746f22c 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1429,9 +1429,7 @@ def _apply(self, func): output_column = cudf.core.column.column_empty(row_count=len(self), dtype='int64') output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') - breakpoint() result = cudf._lib.transform.masked_udf(self, ptx, output_column, output_mask) - breakpoint() return result From 6806968d0f29789451700c6c1cd8307fd58d1c79 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 29 Apr 2021 08:04:24 -0700 Subject: [PATCH 026/132] attempting to use vector overload in jitify --- cpp/src/transform/transform.cpp | 67 ++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 3bef0b29063..c8cde8dd8d6 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -67,6 +67,38 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } +std::vector make_launch_args(table_view data, column_view outcol_view, column_view outmsk_view) { + int n_cols = data.num_columns(); + std::vector results((n_cols * 3) + 3); + + int64_t size = outcol_view.size(); + void* outcol_ptr = (void*)cudf::jit::get_data_ptr(outcol_view); + void* outmsk_ptr = (void*)cudf::jit::get_data_ptr(outmsk_view); + + results[0] = (void*)&size; + results[1] = (void*)&outcol_ptr; + results[2] = (void*)&outmsk_ptr; + + for (int i = 0; i < n_cols; i++) { + int offset = 3 + (i * n_cols); + column_view col = data.column(i); + + void* data_ptr = (void*)cudf::jit::get_data_ptr(col); + results[offset] = (void*)&data_ptr; + + cudf::bitmask_type const* mask_ptr = col.null_mask(); + results[offset + 1] = (void*)&mask_ptr; + + int64_t col_offset = col.offset(); + results[offset + 2] = (void*)&col_offset; + } + return results; +} + +auto make_launch_args_variadic(table_view data) { + +} + void binary_operation(table_view data_view, std::string const& binary_udf, data_type output_type, @@ -88,6 +120,7 @@ void binary_operation(table_view data_view, template_types[offset + 2] = "int64_t"; } + auto launch_args = make_launch_args(data_view, outcol_view, outmsk_view); column_view A = data_view.column(0); column_view B = data_view.column(1); @@ -98,21 +131,35 @@ void binary_operation(table_view data_view, std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - + + std::vector func_args(9); + + cudf::size_type arg_size = outcol_view.size(); + const void* arg_outcol_view = cudf::jit::get_data_ptr(outcol_view); + const void* arg_outmsk_view = cudf::jit::get_data_ptr(outcol_view); + const void* arg_A = cudf::jit::get_data_ptr(A); + cudf::bitmask_type const* arg_A_mask = A.null_mask(); + int64_t arg_A_offset = A.offset(); + const void* arg_B = cudf::jit::get_data_ptr(B); + cudf::bitmask_type const* arg_B_mask = B.null_mask(); + int64_t arg_B_offset = B.offset(); + + func_args[0] = &arg_size; + func_args[1] = &arg_outcol_view; + func_args[2] = &arg_outmsk_view; + func_args[3] = &arg_A; + func_args[4] = &arg_A_mask; + func_args[5] = &arg_A_offset; + func_args[6] = &arg_B; + func_args[7] = &arg_B_mask; + func_args[8] = &arg_B_offset; + rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_binop_kernel_cu_jit) .get_kernel( generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // - ->launch(outcol_view.size(), - cudf::jit::get_data_ptr(outcol_view), - cudf::jit::get_data_ptr(outmsk_view), - cudf::jit::get_data_ptr(A), - A.null_mask(), // cudf::bitmask_type * - A.offset(), - cudf::jit::get_data_ptr(B), - B.null_mask(), - B.offset()); + ->launch(func_args); } From cef8b71c3c1dc865a5159f17ba79b6261c74a066 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 3 May 2021 06:21:33 -0700 Subject: [PATCH 027/132] loop kernel runs finally --- cpp/src/transform/jit/binop_kernel.cu | 1 - cpp/src/transform/transform.cpp | 99 ++++++++++++--------------- 2 files changed, 42 insertions(+), 58 deletions(-) diff --git a/cpp/src/transform/jit/binop_kernel.cu b/cpp/src/transform/jit/binop_kernel.cu index c81cd21dac9..1b3ea42fc22 100644 --- a/cpp/src/transform/jit/binop_kernel.cu +++ b/cpp/src/transform/jit/binop_kernel.cu @@ -103,7 +103,6 @@ __device__ auto make_args(cudf::size_type id, ); } - template __global__ void generic_udf_kernel(cudf::size_type size, diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index c8cde8dd8d6..dffae64c59e 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -67,38 +67,6 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -std::vector make_launch_args(table_view data, column_view outcol_view, column_view outmsk_view) { - int n_cols = data.num_columns(); - std::vector results((n_cols * 3) + 3); - - int64_t size = outcol_view.size(); - void* outcol_ptr = (void*)cudf::jit::get_data_ptr(outcol_view); - void* outmsk_ptr = (void*)cudf::jit::get_data_ptr(outmsk_view); - - results[0] = (void*)&size; - results[1] = (void*)&outcol_ptr; - results[2] = (void*)&outmsk_ptr; - - for (int i = 0; i < n_cols; i++) { - int offset = 3 + (i * n_cols); - column_view col = data.column(i); - - void* data_ptr = (void*)cudf::jit::get_data_ptr(col); - results[offset] = (void*)&data_ptr; - - cudf::bitmask_type const* mask_ptr = col.null_mask(); - results[offset + 1] = (void*)&mask_ptr; - - int64_t col_offset = col.offset(); - results[offset + 2] = (void*)&col_offset; - } - return results; -} - -auto make_launch_args_variadic(table_view data) { - -} - void binary_operation(table_view data_view, std::string const& binary_udf, data_type output_type, @@ -120,46 +88,63 @@ void binary_operation(table_view data_view, template_types[offset + 2] = "int64_t"; } - auto launch_args = make_launch_args(data_view, outcol_view, outmsk_view); column_view A = data_view.column(0); column_view B = data_view.column(1); + + std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") - .instantiate(template_types); + .instantiate(cudf::jit::get_type_name(outcol_view.type()), + "int64_t*", + "uint32_t*", + "int64_t", + "int64_t*", + "uint32_t*", + "int64_t"); std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - std::vector func_args(9); - - cudf::size_type arg_size = outcol_view.size(); - const void* arg_outcol_view = cudf::jit::get_data_ptr(outcol_view); - const void* arg_outmsk_view = cudf::jit::get_data_ptr(outcol_view); - const void* arg_A = cudf::jit::get_data_ptr(A); - cudf::bitmask_type const* arg_A_mask = A.null_mask(); - int64_t arg_A_offset = A.offset(); - const void* arg_B = cudf::jit::get_data_ptr(B); - cudf::bitmask_type const* arg_B_mask = B.null_mask(); - int64_t arg_B_offset = B.offset(); - - func_args[0] = &arg_size; - func_args[1] = &arg_outcol_view; - func_args[2] = &arg_outmsk_view; - func_args[3] = &arg_A; - func_args[4] = &arg_A_mask; - func_args[5] = &arg_A_offset; - func_args[6] = &arg_B; - func_args[7] = &arg_B_mask; - func_args[8] = &arg_B_offset; - + int n_cols = data_view.num_columns(); + std::vector results((n_cols * 3) + 3); + + cudf::size_type size = outcol_view.size(); + const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view); + const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view); + + results[0] = &size; + results[1] = &outcol_ptr; + results[2] = &outmsk_ptr; + column_view col; + + std::vector data_ptrs(n_cols); + std::vector mask_ptrs(n_cols); + std::vector offsets(n_cols); + + for (int i = 0; i < n_cols; i++) { + col = data_view.column(i); + data_ptrs[i] = cudf::jit::get_data_ptr(col); + mask_ptrs[i] = col.null_mask(); + offsets[i] = col.offset(); + } + + int idx = 3; + for (int i = 0; i < n_cols; i++) { + results[idx] = &data_ptrs[i]; + results[idx + 1] = &mask_ptrs[i]; + results[idx + 2] = &offsets[i]; + idx += 3; + } + + rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_binop_kernel_cu_jit) .get_kernel( generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // - ->launch(func_args); + ->launch(results.data()); } From 19b88c54ef297e661429a8421d589d185c27d67d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 3 May 2021 06:24:28 -0700 Subject: [PATCH 028/132] full pipeline works for a dynamic number of columns --- cpp/src/transform/transform.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index dffae64c59e..cdd422a407d 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -88,21 +88,9 @@ void binary_operation(table_view data_view, template_types[offset + 2] = "int64_t"; } - - column_view A = data_view.column(0); - column_view B = data_view.column(1); - - - std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") - .instantiate(cudf::jit::get_type_name(outcol_view.type()), - "int64_t*", - "uint32_t*", - "int64_t", - "int64_t*", - "uint32_t*", - "int64_t"); + .instantiate(template_types); std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); From 4845f27c2b6dc44561153ca04dc206297db52982 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 3 May 2021 14:24:36 -0700 Subject: [PATCH 029/132] code cleanup --- cpp/src/transform/transform.cpp | 67 ------------------------- python/cudf/cudf/_lib/cpp/transform.pxd | 8 --- python/cudf/cudf/_lib/transform.pyx | 32 ------------ 3 files changed, 107 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index cdd422a407d..50abd347c42 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -136,43 +136,6 @@ void binary_operation(table_view data_view, } -void generalized_operation(table_view const& data_view, - std::string const& udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) -{ - rmm::cuda_stream_view stream; - //std::string cuda_source = cudf::jit::parse_single_function_ptx( - // udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - /* - size_t num_cols = data_view.num_columns(); - std::vector input_types(num_cols); - std::vector args(num_cols); - - - column_view this_view; - for (size_t i = 0; i < num_cols; i++) { - this_view = data_view.column(i); - input_types[i] = cudf::jit::get_type_name(this_view.type()); - } - */ - - std::string kernel_name = - jitify2::reflection::Template("genop_kernel") // - .instantiate(cudf::jit::get_type_name(outcol_view.type())); - - cudf::jit::get_program_cache(*transform_jit_baked_udf_requirements_cu_jit) - .get_kernel( - kernel_name, {}, {{"transform/jit/operation-udf.hpp", udf}}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(outcol_view.size(), - static_cast(7), // - cudf::jit::get_data_ptr(outcol_view)); - -} - } // namespace jit } // namespace transformation @@ -215,24 +178,6 @@ std::unique_ptr masked_binary_op_inner(table_view data_view, return output; } -std::unique_ptr generalized_masked_op_inner( - table_view const& data_view, - std::string const& udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - rmm::cuda_stream_view stream = rmm::cuda_stream_default; - - transformation::jit::generalized_operation(data_view, udf, output_type, outcol_view, outmsk_view, mr); - - std::unique_ptr output; - - return output; - -} - } // namespace detail @@ -257,16 +202,4 @@ std::unique_ptr masked_binary_op(table_view data_view, return detail::masked_binary_op_inner(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); } -std::unique_ptr generalized_masked_op( - table_view const& data_view, - std::string const& udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) -{ - return detail::generalized_masked_op_inner(data_view, udf, output_type, outcol_view, outmsk_view, mr); -} - - } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index d7efc5a2d8f..d222c461bba 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -46,14 +46,6 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view outmask_view, ) except + - cdef unique_ptr[column] generalized_masked_op( - table_view data_view, - string udf, - data_type output_type, - column_view outcol_view, - column_view outmask_view - ) except + - cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input ) except + diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 027ff65a13c..2c61be00131 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -127,38 +127,6 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) -def generalized_masked_op(Table data, op, Column output_column, Column output_mask): - cdef string c_str - cdef type_id c_tid - cdef data_type c_dtype - - cdef table_view data_view = data.view() - - from cudf.core.udf import compile_udf, demo_kernel - #c_str = compile_udf(op).encode('UTF-8') - c_str = demo_kernel.encode('UTF-8') - c_tid = ( - np_to_cudf_types[np.dtype('int64')] - ) - c_dtype = data_type(c_tid) - - cdef column_view outcol_view = output_column.view() - cdef column_view outmsk_view = output_mask.view() - - with nogil: - c_output = move(libcudf_transform.generalized_masked_op( - data_view, - c_str, - c_dtype, - outcol_view, - outmsk_view - )) - - output_mask_real = bools_to_mask(output_mask) - output_column = output_column.set_mask(output_mask_real) - - return output_column - def masked_udf(Table incols, op, Column output_column, Column output_mask): cdef table_view data_view = incols.data_view() cdef string c_str = op.encode("UTF-8") From c796dc4360753925a19f97c76c3b571abf481343 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 3 May 2021 14:37:20 -0700 Subject: [PATCH 030/132] more code cleanup/renaming --- cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 3 +-- cpp/include/cudf/transform.hpp | 7 ------- cpp/src/transform/jit/baked_udf_requirements.cu | 2 -- .../jit/{binop_kernel.cu => masked_udf_kernel.cu} | 0 cpp/src/transform/transform.cpp | 4 ++-- 5 files changed, 3 insertions(+), 13 deletions(-) delete mode 100644 cpp/src/transform/jit/baked_udf_requirements.cu rename cpp/src/transform/jit/{binop_kernel.cu => masked_udf_kernel.cu} (100%) diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index c481301e335..7e2ec5254d3 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -56,10 +56,9 @@ endfunction() jit_preprocess_files(SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu - transform/jit/binop_kernel.cu + transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu - transform/jit/baked_udf_requirements.cu ) add_custom_target(jitify_preprocess_run DEPENDS ${JIT_PREPROCESSED_FILES}) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 7e5c1797d11..eb0f9251ce9 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -62,13 +62,6 @@ std::unique_ptr masked_binary_op( column_view const& outmsk_view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr generalized_masked_op( - table_view const& data_view, - std::string const& udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a null_mask from `input` by converting `NaN` to null and diff --git a/cpp/src/transform/jit/baked_udf_requirements.cu b/cpp/src/transform/jit/baked_udf_requirements.cu deleted file mode 100644 index d31f1d9f899..00000000000 --- a/cpp/src/transform/jit/baked_udf_requirements.cu +++ /dev/null @@ -1,2 +0,0 @@ -#include -#include diff --git a/cpp/src/transform/jit/binop_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu similarity index 100% rename from cpp/src/transform/jit/binop_kernel.cu rename to cpp/src/transform/jit/masked_udf_kernel.cu diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 50abd347c42..493233df0d6 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -15,7 +15,7 @@ */ #include -#include +#include #include @@ -128,7 +128,7 @@ void binary_operation(table_view data_view, rmm::cuda_stream_view generic_stream; - cudf::jit::get_program_cache(*transform_jit_binop_kernel_cu_jit) + cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) .get_kernel( generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // From 4f0ab9bdb00c3f4a3f903d45eb5aafff2a1a8205 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 3 May 2021 14:48:14 -0700 Subject: [PATCH 031/132] even more renaming and cleanup --- cpp/include/cudf/transform.hpp | 2 +- cpp/src/transform/jit/masked_udf_kernel.cu | 36 ---------------------- cpp/src/transform/transform.cpp | 30 +++++++++--------- python/cudf/cudf/_lib/cpp/transform.pxd | 2 +- python/cudf/cudf/_lib/transform.pyx | 2 +- 5 files changed, 18 insertions(+), 54 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index eb0f9251ce9..7af849357ec 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -54,7 +54,7 @@ std::unique_ptr transform( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr masked_binary_op( +std::unique_ptr generalized_masked_op( table_view data_view, std::string const& binary_udf, data_type output_type, diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index 1b3ea42fc22..b3bcb0d53fc 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -42,43 +42,7 @@ struct Masked { int64_t value; bool valid; }; -/* -template -__global__ -void binop_kernel(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data, - bool* out_mask, - cudf::bitmask_type const* lhs_mask, - cudf::size_type lhs_offset, - cudf::bitmask_type const* rhs_mask, - cudf::size_type rhs_offset -) { - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - int start = tid + blkid * blksz; - int step = blksz * gridsz; - Masked output; - char l_valid; - char r_valid; - long int l_data; - long int r_data; - - for (cudf::size_type i=start; i __device__ auto make_args(cudf::size_type id, TypeIn in_ptr, diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 493233df0d6..1d8f76d9e2c 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -67,12 +67,12 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -void binary_operation(table_view data_view, - std::string const& binary_udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) +void generalized_operation(table_view data_view, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) { std::vector template_types( @@ -162,7 +162,7 @@ std::unique_ptr transform(column_view const& input, return output; } -std::unique_ptr masked_binary_op_inner(table_view data_view, +std::unique_ptr generalized_masked_op(table_view data_view, std::string const& binary_udf, data_type output_type, column_view const& outcol_view, @@ -170,7 +170,7 @@ std::unique_ptr masked_binary_op_inner(table_view data_view, rmm::mr::device_memory_resource* mr) { rmm::cuda_stream_view stream = rmm::cuda_stream_default; - transformation::jit::binary_operation(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); + transformation::jit::generalized_operation(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); std::unique_ptr output; @@ -192,14 +192,14 @@ std::unique_ptr transform(column_view const& input, return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr); } -std::unique_ptr masked_binary_op(table_view data_view, - std::string const& binary_udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) +std::unique_ptr generalized_masked_op(table_view data_view, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) { - return detail::masked_binary_op_inner(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); + return detail::generalized_masked_op(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); } } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index d222c461bba..82d45811cb3 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -38,7 +38,7 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: bool is_ptx ) except + - cdef unique_ptr[column] masked_binary_op( + cdef unique_ptr[column] generalized_masked_op( table_view data_view, string binary_udf, data_type output_type, diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 2c61be00131..e1b028282f4 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -142,7 +142,7 @@ def masked_udf(Table incols, op, Column output_column, Column output_mask): cdef column_view outmsk_view = output_mask.view() with nogil: - c_output = move(libcudf_transform.masked_binary_op( + c_output = move(libcudf_transform.generalized_masked_op( data_view, c_str, c_dtype, From 3389198d389efb44d5a03368fe758fda3124222a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 4 May 2021 05:49:34 -0700 Subject: [PATCH 032/132] remove old code --- python/cudf/cudf/core/udf.py | 88 ------------------------------------ 1 file changed, 88 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 1931ce7e0d0..ddc89d9e2f2 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -152,91 +152,3 @@ def compile_udf(func): return kernel, ptx NA = _NAType() - -def make_kernel(n_params): - ''' - create a string containing the right templated kernel - for `func` - ''' - - indent = ' '*18 - - # Hack together the template string - result = '' - - templates = 'template " - result += templates - - # Hack together the function signature - sig = '\n__global__\nvoid genop_kernel(cudf::size_type size,\n' - sig += indent + "TypeOut* out_data,\n" - sig += indent + 'bool* out_mask,\n' - for i in range(n_params): - sig += indent + f"Type{i}* data_{i},\n" - sig += indent + f"cudf::bitmask_type const* mask_{i},\n" - sig += indent + f"cudf::size_type offset_{i},\n" - sig = sig[:-2] + ') {' - - result += sig - result += '\n' - - # standard thread block - result += '\n' - result += '\tint tid = threadIdx.x;\n' - result += '\tint blkid = blockIdx.x;\n' - result += '\tint blksz = blockDim.x;\n' - result += '\tint gridsz = gridDim.x;\n' - result += '\tint start = tid + blkid * blksz;\n' - result += '\tint step = blksz * gridsz;\n' - result += '\n' - - result += '\tMasked output;\n' - - for i in range(n_params): - result += f"\tchar valid_{i};\n" - - # main loop - result += "\tfor (cudf::size_type i=start; i -__global__ -void genop_kernel(cudf::size_type size, cudf::size_type value, TypeOut* out_data) { - - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - int start = tid + blkid * blksz; - int step = blksz * gridsz; - - for (cudf::size_type i=start; i Date: Tue, 4 May 2021 11:32:23 -0700 Subject: [PATCH 033/132] more cleanup --- python/cudf/cudf/core/dataframe.py | 1 - python/cudf/cudf/core/udf.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1559a276029..a0d92caa829 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4716,7 +4716,6 @@ def query(self, expr, local_dict=None): return self._apply_boolean_mask(boolmask) def apply(self, func): - breakpoint() return super()._apply(func) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index ddc89d9e2f2..36a47455887 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -148,7 +148,7 @@ def compile_udf(func): ptx, _ = cuda.compile_ptx_for_current_device(func, to_compiler_sig, device=True) # get the kernel that calls the inlineable function - kernel = make_kernel(n_params) + kernel = None return kernel, ptx NA = _NAType() From f7845e518405a8d68c2cabf1adbb23a381a00eb3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 4 May 2021 13:56:27 -0700 Subject: [PATCH 034/132] add a decorator to mimic the pandas api better --- python/cudf/cudf/core/dataframe.py | 3 ++- python/cudf/cudf/core/udf.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index a0d92caa829..69602d8499c 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4716,7 +4716,8 @@ def query(self, expr, local_dict=None): return self._apply_boolean_mask(boolmask) def apply(self, func): - return super()._apply(func) + return func(self) + #return super()._apply(func) @applyutils.doc_apply() diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 36a47455887..012cf94d65d 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -152,3 +152,10 @@ def compile_udf(func): return kernel, ptx NA = _NAType() + +def nulludf(func): + def wrapper(*args): + from cudf import DataFrame + to_udf_table = DataFrame({idx: arg for idx, arg in zip(range(len(args)), args)}) + return to_udf_table._apply(func) + return wrapper From 9e89ebde14c7510c66ab065adc0bb65b92b1b059 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 4 May 2021 14:11:46 -0700 Subject: [PATCH 035/132] starting to write tests --- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 python/cudf/cudf/tests/test_udf_masked_ops.py diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 69602d8499c..2907d541403 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4715,7 +4715,7 @@ def query(self, expr, local_dict=None): boolmask = queryutils.query_execute(self, expr, callenv) return self._apply_boolean_mask(boolmask) - def apply(self, func): + def apply(self, func, axis=1): return func(self) #return super()._apply(func) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py new file mode 100644 index 00000000000..b5b39aced20 --- /dev/null +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -0,0 +1,74 @@ +import cudf +from cudf.core.udf import nulludf +from cudf.tests.utils import assert_eq +import pandas as pd + + +def test_apply_basic(): + def func_pdf(x, y): + return x + y + + @nulludf + def func_gdf(x, y): + return x + y + + + gdf = cudf.DataFrame({ + 'a':[1,2,3], + 'b':[4,5,6] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + + assert_eq(expect, obtain) + +def test_apply_null(): + def func_pdf(x, y): + return x + y + + @nulludf + def func_gdf(x, y): + return x + y + + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + + assert_eq(expect, obtain) + +def test_apply_NA_conditional(): + def func_pdf(x, y): + if x is pd.NA: + return y + else: + return x + y + + @nulludf + def func_gdf(x, y): + if x is cudf.NA: + return y + else: + return x + y + + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + + assert_eq(expect, obtain) From e19c8ba96e0b0252f38818ee1633db0c0f6d140d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 5 May 2021 12:47:58 -0700 Subject: [PATCH 036/132] add tests for constants --- python/cudf/cudf/core/udf.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 012cf94d65d..a3114461d22 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -30,7 +30,7 @@ def __init__(self, value, valid): class MaskedType(types.Type): - def __init__(self): + def __init__(self): # add `value` super().__init__(name="Masked") class NAType(types.Type): @@ -64,11 +64,15 @@ def typer(value, valid): @register_model(MaskedType) class MaskedModel(models.StructModel): def __init__(self, dmm, fe_type): + # fe_type is a Maskedtype instance + # will have a .value attr + # value -> fe_type.value members = [("value", types.int64), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) register_model(NAType)(models.OpaqueModel) +# types.int64 instance, change to typeclass (types.Integer, types.Number, etc) @lower_builtin(Masked, types.int64, types.bool_) def impl_masked_constructor(context, builder, sig, args): typ = sig.return_type @@ -85,6 +89,8 @@ class MaskedScalarAdd(AbstractTemplate): # abstracttemplate vs concretetemplate def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): + # result type: f(args[0], args[1], op) where f is numba's typing for self.key + # self.key -> operator being used return nb_signature(numba_masked, numba_masked, numba_masked) @@ -113,6 +119,7 @@ def masked_scalar_add_impl(context, builder, sig, args): valid = builder.and_(m1.valid, m2.valid) result.valid = valid with builder.if_then(valid): + # result.value = numba_op(m1.value, m2.value) result.value = builder.add(m1.value, m2.value) return result._getvalue() @@ -131,6 +138,28 @@ def constant_dummy(context, builder, ty, pyval): # This handles None, etc. return context.get_dummy_value() +@cuda_registry.register_global(operator.add) +class MaskedScalarAddConstant(AbstractTemplate): + def generic(self, args, kws): + if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer): + return nb_signature(numba_masked, numba_masked, types.int64) + +@cuda_lower(operator.add, MaskedType, types.Integer) +def masked_scalar_add_constant_impl(context, builder, sig, input_values): + masked_type, const_type = sig.args + + indata = cgutils.create_struct_proxy(masked_type)(context, builder, value=input_values[0]) + result = cgutils.create_struct_proxy(numba_masked)(context, builder) + #to_add_const = context.get_constant(const_type, input_values[1]) + + result.valid = context.get_constant(types.boolean, 0) + with builder.if_then(indata.valid): + result.value = builder.add(indata.value, input_values[1]) + result.valid = context.get_constant(types.boolean, 1) + + return result._getvalue() + + @cuda.jit(numba_masked(numba_masked, numba_masked), device=True) def masked_add_py(m1, m2): From 9880081495c9fa520f02a968f4f5c123044230db Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 5 May 2021 12:48:21 -0700 Subject: [PATCH 037/132] add failing tests for literal return --- python/cudf/cudf/tests/test_udf_masked_ops.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index b5b39aced20..a8475a1849f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -72,3 +72,40 @@ def func_gdf(x, y): obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) assert_eq(expect, obtain) + + +def test_apply_return_literal(): + # 1. Casting rule literal -> Masked + # -> a) make it so numba knows that we can even promote literals to Masked () + # -> b) implement custom lowering to specify how this actually happens (python only) + + + # 2. Custom unfication code + + + # numba/core/type + def func_pdf(x, y): + if x is pd.NA: + return 5 + else: + return x + y + + @nulludf + def func_gdf(x, y): + if x is cudf.NA: + return 5 # Masked(5, True) + else: + return x + y + + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + + assert_eq(expect, obtain) From 3e6a28098eaf906d34c014651773e4c3e1a36e43 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 13:40:33 -0700 Subject: [PATCH 038/132] add NA and add constant tests --- python/cudf/cudf/tests/test_udf_masked_ops.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index a8475a1849f..fc0de1a4292 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -43,7 +43,49 @@ def func_gdf(x, y): expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + assert_eq(expect, obtain) + + +def test_apply_add_null(): + def func_pdf(x, y): + return x + y + pd.NA + + @nulludf + def func_gdf(x, y): + return x + y + cudf.NA + + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + # TODO: dtype mismatch here + assert_eq(expect, obtain, check_dtype=False) + +def test_apply_add_constant(): + def func_pdf(x, y): + return x + y + 1 + + @nulludf + def func_gdf(x, y): + return x + y + 1 + + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) assert_eq(expect, obtain) def test_apply_NA_conditional(): From 3028dbad10980ff039f2450ae7363c97e49e5d62 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 13:40:47 -0700 Subject: [PATCH 039/132] MaskedType is parameterized --- python/cudf/cudf/core/udf.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index a3114461d22..b185e25bdf9 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -30,19 +30,23 @@ def __init__(self, value, valid): class MaskedType(types.Type): - def __init__(self): # add `value` + def __init__(self, value): # add `value` + self.value = value super().__init__(name="Masked") + + def __repr__(self): + return f"MaskedType({self.value})" class NAType(types.Type): def __init__(self): super().__init__(name="NA") -numba_masked = MaskedType() # name this something more natural - GM +numba_masked = MaskedType(types.int64) # name this something more natural - GM numba_na = NAType() @typeof_impl.register(Masked) def typeof_masked(val, c): - return numba_masked + return Masked(val.value) @typeof_impl.register(_NAType) def typeof_na(val, c): @@ -54,7 +58,7 @@ def typer(value, valid): if isinstance(value, types.Integer) and isinstance( valid, types.Boolean ): - return numba_masked + return Masked(value) return typer @@ -91,14 +95,14 @@ def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): # result type: f(args[0], args[1], op) where f is numba's typing for self.key # self.key -> operator being used - return nb_signature(numba_masked, numba_masked, numba_masked) + return nb_signature(numba_masked, MaskedType(args[0].value), MaskedType(args[1].value)) @cuda_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - return nb_signature(numba_masked, numba_masked, numba_na) + return nb_signature(MaskedType(args[0].value), MaskedType(args[0].value), numba_na) @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -127,9 +131,9 @@ def masked_scalar_add_impl(context, builder, sig, args): @cuda_lower(operator.add, MaskedType, NAType) def masked_scalar_add_na_impl(context, builder, sig, args): -# return_type = sig.return_type + return_type = sig.return_type # use context to get llvm type for a bool - result = cgutils.create_struct_proxy(numba_masked)(context, builder) + result = cgutils.create_struct_proxy(MaskedType(return_type.value))(context, builder) result.valid = context.get_constant(types.boolean, 0) return result._getvalue() @@ -142,14 +146,15 @@ def constant_dummy(context, builder, ty, pyval): class MaskedScalarAddConstant(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer): - return nb_signature(numba_masked, numba_masked, types.int64) + # TODO - need to get the result type of args[0].value and args[1] + return nb_signature(MaskedType(args[0].value), MaskedType(args[0].value), types.int64) @cuda_lower(operator.add, MaskedType, types.Integer) def masked_scalar_add_constant_impl(context, builder, sig, input_values): masked_type, const_type = sig.args - indata = cgutils.create_struct_proxy(masked_type)(context, builder, value=input_values[0]) - result = cgutils.create_struct_proxy(numba_masked)(context, builder) + indata = cgutils.create_struct_proxy(MaskedType(masked_type.value))(context, builder, value=input_values[0]) + result = cgutils.create_struct_proxy(MaskedType(masked_type.value))(context, builder) #to_add_const = context.get_constant(const_type, input_values[1]) result.valid = context.get_constant(types.boolean, 0) @@ -160,15 +165,6 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): return result._getvalue() - -@cuda.jit(numba_masked(numba_masked, numba_masked), device=True) -def masked_add_py(m1, m2): - return m1 + m2 - - -def masked_add_py_2(m1, m2): - return m1 + m2 - def compile_udf(func): n_params = len(py_signature(func).parameters) to_compiler_sig = tuple(numba_masked for arg in range(n_params)) From ecd8527815b2c12d959ea2e6e0ade937d11e4604 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 14:03:39 -0700 Subject: [PATCH 040/132] forward progress on mixed typing --- python/cudf/cudf/core/udf.py | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index b185e25bdf9..6c8d7837d93 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -30,12 +30,14 @@ def __init__(self, value, valid): class MaskedType(types.Type): - def __init__(self, value): # add `value` - self.value = value + def __init__(self, value): + # MaskedType in numba shall be parameterized + # with a value type + self.value_type = value super().__init__(name="Masked") def __repr__(self): - return f"MaskedType({self.value})" + return f"MaskedType({self.value_type})" class NAType(types.Type): def __init__(self): @@ -68,10 +70,7 @@ def typer(value, valid): @register_model(MaskedType) class MaskedModel(models.StructModel): def __init__(self, dmm, fe_type): - # fe_type is a Maskedtype instance - # will have a .value attr - # value -> fe_type.value - members = [("value", types.int64), ("valid", types.bool_)] + members = [("value", fe_type.value_type), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) register_model(NAType)(models.OpaqueModel) @@ -93,16 +92,25 @@ class MaskedScalarAdd(AbstractTemplate): # abstracttemplate vs concretetemplate def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): - # result type: f(args[0], args[1], op) where f is numba's typing for self.key - # self.key -> operator being used - return nb_signature(numba_masked, MaskedType(args[0].value), MaskedType(args[1].value)) + # In the case of op(Masked, Masked), the return type is a Masked + # such that Masked.value + return_type = self.context.resolve_function_type( + self.key, + ( + args[0].value_type, + args[1].value_type + ), + kws + ).return_type + return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), MaskedType(args[1].value_type)) @cuda_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - return nb_signature(MaskedType(args[0].value), MaskedType(args[0].value), numba_na) + # in the case + return nb_signature(MaskedType(args[0].value_type), MaskedType(args[0].value_type), numba_na) @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -133,7 +141,7 @@ def masked_scalar_add_impl(context, builder, sig, args): def masked_scalar_add_na_impl(context, builder, sig, args): return_type = sig.return_type # use context to get llvm type for a bool - result = cgutils.create_struct_proxy(MaskedType(return_type.value))(context, builder) + result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))(context, builder) result.valid = context.get_constant(types.boolean, 0) return result._getvalue() @@ -147,14 +155,14 @@ class MaskedScalarAddConstant(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer): # TODO - need to get the result type of args[0].value and args[1] - return nb_signature(MaskedType(args[0].value), MaskedType(args[0].value), types.int64) + return nb_signature(MaskedType(args[0].value_type), MaskedType(args[0].value_type), types.int64) @cuda_lower(operator.add, MaskedType, types.Integer) def masked_scalar_add_constant_impl(context, builder, sig, input_values): masked_type, const_type = sig.args - indata = cgutils.create_struct_proxy(MaskedType(masked_type.value))(context, builder, value=input_values[0]) - result = cgutils.create_struct_proxy(MaskedType(masked_type.value))(context, builder) + indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))(context, builder, value=input_values[0]) + result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))(context, builder) #to_add_const = context.get_constant(const_type, input_values[1]) result.valid = context.get_constant(types.boolean, 0) From 5791413dda756e8c82380c72ca653a57bd1ff492 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 14:07:39 -0700 Subject: [PATCH 041/132] generalize MaskedScalarAddConstant --- python/cudf/cudf/core/udf.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 6c8d7837d93..7261c36cf27 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -109,8 +109,10 @@ def generic(self, args, kws): class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - # in the case - return nb_signature(MaskedType(args[0].value_type), MaskedType(args[0].value_type), numba_na) + # In the case of op(Masked, NA), the result has the same + # dtype as the original regardless of what it is + return_type = args[0].value_type + return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), numba_na) @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -154,8 +156,17 @@ def constant_dummy(context, builder, ty, pyval): class MaskedScalarAddConstant(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer): - # TODO - need to get the result type of args[0].value and args[1] - return nb_signature(MaskedType(args[0].value_type), MaskedType(args[0].value_type), types.int64) + # In the case of op(Masked, constant), we resolve the type between + # the Masked value_type and the constant's type directly + return_type = self.context.resolve_function_type( + self.key, + ( + args[0].value_type, + args[1] + ), + kws + ).return_type + return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), args[1]) @cuda_lower(operator.add, MaskedType, types.Integer) def masked_scalar_add_constant_impl(context, builder, sig, input_values): From 77c8ee4971fb00ddeede28415df55e3a8c57b3ec Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 14:25:03 -0700 Subject: [PATCH 042/132] write a signature for any incoming types --- python/cudf/cudf/core/frame.py | 5 +---- python/cudf/cudf/core/udf.py | 10 +++++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index e0d2746f22c..c419021e651 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1421,10 +1421,7 @@ def _quantiles( def _apply(self, func): from cudf.core.udf import compile_udf - if not all(np.dtype('int64') == dtype for dtype in self.dtypes): - raise TypeError("Currently only int64 is supported") - - kernel, ptx = compile_udf(func) + kernel, ptx = compile_udf(func, self.dtypes) output_column = cudf.core.column.column_empty(row_count=len(self), dtype='int64') output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 7261c36cf27..92233854249 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -18,6 +18,8 @@ from numba.extending import types import inspect +from numba.np import numpy_support + from llvmlite import ir from cudf.core.scalar import _NAType @@ -76,6 +78,7 @@ def __init__(self, dmm, fe_type): register_model(NAType)(models.OpaqueModel) # types.int64 instance, change to typeclass (types.Integer, types.Number, etc) +''' @lower_builtin(Masked, types.int64, types.bool_) def impl_masked_constructor(context, builder, sig, args): typ = sig.return_type @@ -85,7 +88,7 @@ def impl_masked_constructor(context, builder, sig, args): masked.value = value masked.valid = valid return masked._getvalue() # return a pointer to the struct I created - +''' @cuda_registry.register_global(operator.add) class MaskedScalarAdd(AbstractTemplate): @@ -184,10 +187,11 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): return result._getvalue() -def compile_udf(func): +def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) - to_compiler_sig = tuple(numba_masked for arg in range(n_params)) + + to_compiler_sig = tuple(MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes)) # Get the inlineable PTX function ptx, _ = cuda.compile_ptx_for_current_device(func, to_compiler_sig, device=True) From 85f1fbac525888f4acb397c92f515e6f724836d8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 7 May 2021 14:25:49 -0700 Subject: [PATCH 043/132] reformat code --- python/cudf/cudf/core/udf.py | 99 +++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 31 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 92233854249..cb71b905447 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -14,7 +14,10 @@ from inspect import signature as py_signature from numba.core.typing.templates import AbstractTemplate from numba.cuda.cudadecl import registry as cuda_registry -from numba.cuda.cudaimpl import lower as cuda_lower, registry as cuda_lowering_registry +from numba.cuda.cudaimpl import ( + lower as cuda_lower, + registry as cuda_lowering_registry, +) from numba.extending import types import inspect @@ -25,6 +28,7 @@ from numba.core.extending import make_attribute_wrapper + class Masked(object): def __init__(self, value, valid): self.value = value @@ -37,48 +41,56 @@ def __init__(self, value): # with a value type self.value_type = value super().__init__(name="Masked") - + def __repr__(self): return f"MaskedType({self.value_type})" + class NAType(types.Type): def __init__(self): super().__init__(name="NA") + numba_masked = MaskedType(types.int64) # name this something more natural - GM numba_na = NAType() + @typeof_impl.register(Masked) def typeof_masked(val, c): return Masked(val.value) + @typeof_impl.register(_NAType) def typeof_na(val, c): return numba_na + @type_callable(Masked) def type_masked(context): def typer(value, valid): if isinstance(value, types.Integer) and isinstance( valid, types.Boolean ): - return Masked(value) + return Masked(value) return typer + make_attribute_wrapper(MaskedType, "value", "value") make_attribute_wrapper(MaskedType, "valid", "valid") + @register_model(MaskedType) class MaskedModel(models.StructModel): def __init__(self, dmm, fe_type): members = [("value", fe_type.value_type), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) + register_model(NAType)(models.OpaqueModel) # types.int64 instance, change to typeclass (types.Integer, types.Number, etc) -''' +""" @lower_builtin(Masked, types.int64, types.bool_) def impl_masked_constructor(context, builder, sig, args): typ = sig.return_type @@ -88,7 +100,8 @@ def impl_masked_constructor(context, builder, sig, args): masked.value = value masked.valid = valid return masked._getvalue() # return a pointer to the struct I created -''' +""" + @cuda_registry.register_global(operator.add) class MaskedScalarAdd(AbstractTemplate): @@ -96,26 +109,30 @@ class MaskedScalarAdd(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): # In the case of op(Masked, Masked), the return type is a Masked - # such that Masked.value + # such that Masked.value return_type = self.context.resolve_function_type( - self.key, - ( - args[0].value_type, - args[1].value_type - ), - kws + self.key, (args[0].value_type, args[1].value_type), kws ).return_type - return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), MaskedType(args[1].value_type)) + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + MaskedType(args[1].value_type), + ) @cuda_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - # In the case of op(Masked, NA), the result has the same + # In the case of op(Masked, NA), the result has the same # dtype as the original regardless of what it is return_type = args[0].value_type - return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), numba_na) + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + numba_na, + ) + @cuda_lower(operator.add, MaskedType, MaskedType) def masked_scalar_add_impl(context, builder, sig, args): @@ -146,38 +163,48 @@ def masked_scalar_add_impl(context, builder, sig, args): def masked_scalar_add_na_impl(context, builder, sig, args): return_type = sig.return_type # use context to get llvm type for a bool - result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))(context, builder) + result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( + context, builder + ) result.valid = context.get_constant(types.boolean, 0) return result._getvalue() + @cuda_lowering_registry.lower_constant(NAType) def constant_dummy(context, builder, ty, pyval): # This handles None, etc. return context.get_dummy_value() + @cuda_registry.register_global(operator.add) class MaskedScalarAddConstant(AbstractTemplate): def generic(self, args, kws): - if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer): + if isinstance(args[0], MaskedType) and isinstance( + args[1], types.Integer + ): # In the case of op(Masked, constant), we resolve the type between # the Masked value_type and the constant's type directly return_type = self.context.resolve_function_type( - self.key, - ( - args[0].value_type, - args[1] - ), - kws + self.key, (args[0].value_type, args[1]), kws ).return_type - return nb_signature(MaskedType(return_type), MaskedType(args[0].value_type), args[1]) + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + args[1], + ) + @cuda_lower(operator.add, MaskedType, types.Integer) def masked_scalar_add_constant_impl(context, builder, sig, input_values): masked_type, const_type = sig.args - indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))(context, builder, value=input_values[0]) - result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))(context, builder) - #to_add_const = context.get_constant(const_type, input_values[1]) + indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder, value=input_values[0] + ) + result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder + ) + # to_add_const = context.get_constant(const_type, input_values[1]) result.valid = context.get_constant(types.boolean, 0) with builder.if_then(indata.valid): @@ -189,21 +216,31 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) - - to_compiler_sig = tuple(MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes)) + to_compiler_sig = tuple( + MaskedType(arg) + for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) + ) # Get the inlineable PTX function - ptx, _ = cuda.compile_ptx_for_current_device(func, to_compiler_sig, device=True) + ptx, _ = cuda.compile_ptx_for_current_device( + func, to_compiler_sig, device=True + ) # get the kernel that calls the inlineable function kernel = None return kernel, ptx + NA = _NAType() + def nulludf(func): def wrapper(*args): from cudf import DataFrame - to_udf_table = DataFrame({idx: arg for idx, arg in zip(range(len(args)), args)}) + + to_udf_table = DataFrame( + {idx: arg for idx, arg in zip(range(len(args)), args)} + ) return to_udf_table._apply(func) + return wrapper From 22c220c8c64eb46e6652d6ae0ed6e9c7391aa4a9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sat, 8 May 2021 14:24:12 -0700 Subject: [PATCH 044/132] need a separate __hash__ for different MaskedType --- python/cudf/cudf/core/udf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index cb71b905447..519ef43b31b 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -39,19 +39,20 @@ class MaskedType(types.Type): def __init__(self, value): # MaskedType in numba shall be parameterized # with a value type - self.value_type = value super().__init__(name="Masked") + self.value_type = value def __repr__(self): return f"MaskedType({self.value_type})" + def __hash__(self): + return self.__repr__().__hash__() class NAType(types.Type): def __init__(self): super().__init__(name="NA") -numba_masked = MaskedType(types.int64) # name this something more natural - GM numba_na = NAType() @@ -216,7 +217,7 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) - + breakpoint() to_compiler_sig = tuple( MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) From 1ba3338a273cefcdd721c4621836d428bfa6389f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 May 2021 11:21:06 -0700 Subject: [PATCH 045/132] first sign that mixed typing works end to end --- cpp/src/transform/jit/masked_udf_kernel.cu | 5 +++-- python/cudf/cudf/_lib/transform.pyx | 2 +- python/cudf/cudf/core/frame.py | 4 ++-- python/cudf/cudf/core/udf.py | 16 ++++++++++------ 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index b3bcb0d53fc..d384ffc61b1 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -38,8 +38,9 @@ namespace cudf { namespace transformation { namespace jit { +template struct Masked { - int64_t value; + T value; bool valid; }; @@ -82,7 +83,7 @@ void generic_udf_kernel(cudf::size_type size, int start = tid + blkid * blksz; int step = blksz * gridsz; - Masked output; + Masked output; for (cudf::size_type i=start; i ( - np_to_cudf_types[np.dtype('int64')] + np_to_cudf_types[output_column.dtype] ) c_dtype = data_type(c_tid) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index c419021e651..b8151e804e9 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1421,9 +1421,9 @@ def _quantiles( def _apply(self, func): from cudf.core.udf import compile_udf - kernel, ptx = compile_udf(func, self.dtypes) + output_dtype, ptx = compile_udf(func, self.dtypes) - output_column = cudf.core.column.column_empty(row_count=len(self), dtype='int64') + output_column = cudf.core.column.column_empty(row_count=len(self), dtype=output_dtype) output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') result = cudf._lib.transform.masked_udf(self, ptx, output_column, output_mask) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 519ef43b31b..82415645f24 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -155,7 +155,11 @@ def masked_scalar_add_impl(context, builder, sig, args): result.valid = valid with builder.if_then(valid): # result.value = numba_op(m1.value, m2.value) - result.value = builder.add(m1.value, m2.value) + result.value = context.compile_internal( + builder, + lambda x, y: x + y, + nb_signature(masked_return_type.value_type, masked_type_1.value_type, masked_type_2.value_type), (m1.value, m2.value) + ) return result._getvalue() @@ -217,19 +221,19 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) - breakpoint() to_compiler_sig = tuple( MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) ) # Get the inlineable PTX function - ptx, _ = cuda.compile_ptx_for_current_device( + ptx, numba_output_type = cuda.compile_ptx_for_current_device( func, to_compiler_sig, device=True ) + numpy_output_type = numpy_support.as_dtype( + numba_output_type.value_type + ) - # get the kernel that calls the inlineable function - kernel = None - return kernel, ptx + return numpy_output_type, ptx NA = _NAType() From be062287af6e7978287ddd0a6b2f0b7e132c2aa1 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 May 2021 12:02:38 -0700 Subject: [PATCH 046/132] add tests for columns of mixed data type --- python/cudf/cudf/tests/test_udf_masked_ops.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index fc0de1a4292..252ddd24088 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -1,8 +1,9 @@ import cudf from cudf.core.udf import nulludf -from cudf.tests.utils import assert_eq +from cudf.tests.utils import assert_eq, NUMERIC_TYPES import pandas as pd - +import itertools +import pytest def test_apply_basic(): def func_pdf(x, y): @@ -116,6 +117,33 @@ def func_gdf(x, y): assert_eq(expect, obtain) +@pytest.mark.parametrize('dtype_a', list(NUMERIC_TYPES)) +@pytest.mark.parametrize('dtype_b', list(NUMERIC_TYPES)) +def test_apply_mixed_dtypes(dtype_a, dtype_b): + def func_pdf(x, y): + return x + y + + @nulludf + def func_gdf(x, y): + return x + y + + gdf = cudf.DataFrame({ + 'a':[1.5,None,3, None], + 'b':[4,5,None, None] + }) + gdf['a'] = gdf['a'].astype(dtype_a) + gdf['b'] = gdf['b'].astype(dtype_b) + + pdf = gdf.to_pandas() + + expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) + obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + + # currently, cases where one side is float32 fail, pandas doing some + # weird casting here and getting float64 always + assert_eq(expect, obtain) + + def test_apply_return_literal(): # 1. Casting rule literal -> Masked # -> a) make it so numba knows that we can even promote literals to Masked () From 029203b5fb34ab0070cc5e2f8b0ff0f5c4ba67b6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 May 2021 12:25:39 -0700 Subject: [PATCH 047/132] incorporate grahams custom unification of extensiontypes with literals --- python/cudf/cudf/core/udf.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 82415645f24..5726df553c8 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -13,7 +13,9 @@ from numba.core.typing import signature as nb_signature from inspect import signature as py_signature from numba.core.typing.templates import AbstractTemplate -from numba.cuda.cudadecl import registry as cuda_registry +from numba.cuda.cudadecl import registry as cuda_decl_registry +from numba.cuda.cudaimpl import registry as cuda_impl_registry + from numba.cuda.cudaimpl import ( lower as cuda_lower, registry as cuda_lowering_registry, @@ -48,6 +50,14 @@ def __repr__(self): def __hash__(self): return self.__repr__().__hash__() + def unify(self, context, other): + breakpoint() + unified = context.unify_pairs(self.value_type, other) + if unified is None: + return None + + return MaskedType(unified) + class NAType(types.Type): def __init__(self): super().__init__(name="NA") @@ -104,7 +114,7 @@ def impl_masked_constructor(context, builder, sig, args): """ -@cuda_registry.register_global(operator.add) +@cuda_decl_registry.register_global(operator.add) class MaskedScalarAdd(AbstractTemplate): # abstracttemplate vs concretetemplate def generic(self, args, kws): @@ -121,7 +131,7 @@ def generic(self, args, kws): ) -@cuda_registry.register_global(operator.add) +@cuda_decl_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): @@ -181,7 +191,7 @@ def constant_dummy(context, builder, ty, pyval): return context.get_dummy_value() -@cuda_registry.register_global(operator.add) +@cuda_decl_registry.register_global(operator.add) class MaskedScalarAddConstant(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance( @@ -218,6 +228,15 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): return result._getvalue() +# To handle the unification, we need to support casting from any type to an +# extension type. The cast implementation takes the value passed in and returns +# an extension struct wrapping that value. +@cuda_impl_registry.lower_cast(types.Any, MaskedType) +def cast_primitive_to_extension(context, builder, fromty, toty, val): + casted = context.cast(builder, val, fromty, toty.value_type) + ext = cgutils.create_struct_proxy(toty)(context, builder) + ext.value = casted + return ext._getvalue() def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) From f024bf76ea2895b159c4fedce5b5d35879d25635 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 May 2021 13:09:53 -0700 Subject: [PATCH 048/132] unify MaskedType and NAType and add a huge comment --- python/cudf/cudf/core/udf.py | 50 ++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 5726df553c8..29f06744593 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -51,7 +51,53 @@ def __hash__(self): return self.__repr__().__hash__() def unify(self, context, other): - breakpoint() + ''' + Logic for sorting out what to do when the UDF conditionally + returns a `MaskedType`, an `NAType`, or a literal based off + the data at runtime. + + In this framework, every input column is treated as having + type `MaskedType`. Operations like `x + y` are understood + as translating to: + + `Masked(value=x, valid=True) + Masked(value=y, valid=True)` + + This means if the user writes a function such as + def f(x, y): + return x + y + + numba sees this function as: + f(x: MaskedType, y: MaskedType) -> MaskedType + + However if the user writes something like: + def f(x, y): + if x > 5: + return 42 + else: + return x + y + + numba now sees this as + f(x: MaskedType, y: MaskedType) -> MaskedType OR literal + + In general, when numba sees a function that could return + more than a single type, it invokes unification to find a + common type that can hold all possible results, eg given + a function that could return an int8 or an int16, numba + will resolve the type to int16. However it does not know + how to unify MaskedType with primitive types. We need to + actually define the common type between MaskedType and + literals, as well as between a Masked and an NA. For full + generality, we'd need to unify NA and literal as well. + ''' + + # If we have Masked and NA, the output should be a + # MaskedType with the original type as its value_type + if isinstance(other, NAType): + return MaskedType(self.value_type) + + # if we have MaskedType and Literal, the output should be + # determined from the MaskedType.value_type (which is a + # primitive type) and other unified = context.unify_pairs(self.value_type, other) if unified is None: return None @@ -232,7 +278,7 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): # extension type. The cast implementation takes the value passed in and returns # an extension struct wrapping that value. @cuda_impl_registry.lower_cast(types.Any, MaskedType) -def cast_primitive_to_extension(context, builder, fromty, toty, val): +def cast_primitive_to_masked(context, builder, fromty, toty, val): casted = context.cast(builder, val, fromty, toty.value_type) ext = cgutils.create_struct_proxy(toty)(context, builder) ext.value = casted From 2ef4520bb46b1a54588377ae6936aa433f6fe1e3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 10 May 2021 14:17:13 -0700 Subject: [PATCH 049/132] Questionable unification of Masked with Literal --- python/cudf/cudf/core/udf.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 29f06744593..1bd6e9bef12 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -108,6 +108,13 @@ class NAType(types.Type): def __init__(self): super().__init__(name="NA") + def unify(self, context, other): + ''' + Masked <-> NA works from above + Literal <-> NA -> Masked + ''' + if isinstance(other, types.abstract.Literal): + return MaskedType(other.literal_type) numba_na = NAType() @@ -284,6 +291,13 @@ def cast_primitive_to_masked(context, builder, fromty, toty, val): ext.value = casted return ext._getvalue() +@cuda_impl_registry.lower_cast(NAType, MaskedType) +def cast_na_to_masked(context, builder, fromty, toty, val): + result = cgutils.create_struct_proxy(toty)(context, builder) + result.valid = context.get_constant(types.boolean, 0) + + return result._getvalue() + def compile_udf(func, dtypes): n_params = len(py_signature(func).parameters) to_compiler_sig = tuple( From 90d5127c83e28050c4268ca33b4da3ecfb89e11c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 11 May 2021 07:02:52 -0700 Subject: [PATCH 050/132] removed unused code --- python/cudf/cudf/core/udf.py | 47 ++---------------------------------- 1 file changed, 2 insertions(+), 45 deletions(-) diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py index 1bd6e9bef12..9f2cd0b412c 100644 --- a/python/cudf/cudf/core/udf.py +++ b/python/cudf/cudf/core/udf.py @@ -30,13 +30,6 @@ from numba.core.extending import make_attribute_wrapper - -class Masked(object): - def __init__(self, value, valid): - self.value = value - self.valid = valid - - class MaskedType(types.Type): def __init__(self, value): # MaskedType in numba shall be parameterized @@ -116,32 +109,9 @@ def unify(self, context, other): if isinstance(other, types.abstract.Literal): return MaskedType(other.literal_type) -numba_na = NAType() - - -@typeof_impl.register(Masked) -def typeof_masked(val, c): - return Masked(val.value) - - @typeof_impl.register(_NAType) def typeof_na(val, c): - return numba_na - - -@type_callable(Masked) -def type_masked(context): - def typer(value, valid): - if isinstance(value, types.Integer) and isinstance( - valid, types.Boolean - ): - return Masked(value) - - return typer - - -make_attribute_wrapper(MaskedType, "value", "value") -make_attribute_wrapper(MaskedType, "valid", "valid") + return NAType() @register_model(MaskedType) @@ -153,19 +123,6 @@ def __init__(self, dmm, fe_type): register_model(NAType)(models.OpaqueModel) -# types.int64 instance, change to typeclass (types.Integer, types.Number, etc) -""" -@lower_builtin(Masked, types.int64, types.bool_) -def impl_masked_constructor(context, builder, sig, args): - typ = sig.return_type - value, valid = args - - masked = cgutils.create_struct_proxy(typ)(context, builder) - masked.value = value - masked.valid = valid - return masked._getvalue() # return a pointer to the struct I created -""" - @cuda_decl_registry.register_global(operator.add) class MaskedScalarAdd(AbstractTemplate): @@ -194,7 +151,7 @@ def generic(self, args, kws): return nb_signature( MaskedType(return_type), MaskedType(args[0].value_type), - numba_na, + NAType(), ) From 0953bd17a50a75560f871f7122a40e4c40ee2a1f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 11 May 2021 08:42:58 -0700 Subject: [PATCH 051/132] move alot of code around and refactor, add comments --- python/cudf/cudf/core/dataframe.py | 1 - python/cudf/cudf/core/frame.py | 4 +- python/cudf/cudf/core/udf.py | 287 ------------------ python/cudf/cudf/core/udf/__init__.py | 1 + python/cudf/cudf/core/udf/lowering.py | 114 +++++++ python/cudf/cudf/core/udf/pipeline.py | 49 +++ python/cudf/cudf/core/udf/typing.py | 186 ++++++++++++ python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 8 files changed, 352 insertions(+), 292 deletions(-) delete mode 100644 python/cudf/cudf/core/udf.py create mode 100644 python/cudf/cudf/core/udf/__init__.py create mode 100644 python/cudf/cudf/core/udf/lowering.py create mode 100644 python/cudf/cudf/core/udf/pipeline.py create mode 100644 python/cudf/cudf/core/udf/typing.py diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 2907d541403..91a97ee39e2 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -35,7 +35,6 @@ from cudf.core.index import Index, RangeIndex, as_index from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer from cudf.core.series import Series -from cudf.core.udf import compile_udf from cudf.core.window import Rolling from cudf.utils import applyutils, docutils, ioutils, queryutils, utils from cudf.utils.docutils import copy_docstring diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index b8151e804e9..fe632af6f27 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1419,9 +1419,7 @@ def _quantiles( return result def _apply(self, func): - from cudf.core.udf import compile_udf - - output_dtype, ptx = compile_udf(func, self.dtypes) + output_dtype, ptx = cudf.core.udf.pipeline.compile_udf(func, self.dtypes) output_column = cudf.core.column.column_empty(row_count=len(self), dtype=output_dtype) output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') diff --git a/python/cudf/cudf/core/udf.py b/python/cudf/cudf/core/udf.py deleted file mode 100644 index 9f2cd0b412c..00000000000 --- a/python/cudf/cudf/core/udf.py +++ /dev/null @@ -1,287 +0,0 @@ -import operator -import numba -import numpy as np -from numba import cuda, njit -from numba.core import cgutils -from numba.core.extending import ( - lower_builtin, - models, - register_model, - type_callable, - typeof_impl, -) -from numba.core.typing import signature as nb_signature -from inspect import signature as py_signature -from numba.core.typing.templates import AbstractTemplate -from numba.cuda.cudadecl import registry as cuda_decl_registry -from numba.cuda.cudaimpl import registry as cuda_impl_registry - -from numba.cuda.cudaimpl import ( - lower as cuda_lower, - registry as cuda_lowering_registry, -) -from numba.extending import types -import inspect - -from numba.np import numpy_support - -from llvmlite import ir -from cudf.core.scalar import _NAType - -from numba.core.extending import make_attribute_wrapper - -class MaskedType(types.Type): - def __init__(self, value): - # MaskedType in numba shall be parameterized - # with a value type - super().__init__(name="Masked") - self.value_type = value - - def __repr__(self): - return f"MaskedType({self.value_type})" - - def __hash__(self): - return self.__repr__().__hash__() - - def unify(self, context, other): - ''' - Logic for sorting out what to do when the UDF conditionally - returns a `MaskedType`, an `NAType`, or a literal based off - the data at runtime. - - In this framework, every input column is treated as having - type `MaskedType`. Operations like `x + y` are understood - as translating to: - - `Masked(value=x, valid=True) + Masked(value=y, valid=True)` - - This means if the user writes a function such as - def f(x, y): - return x + y - - numba sees this function as: - f(x: MaskedType, y: MaskedType) -> MaskedType - - However if the user writes something like: - def f(x, y): - if x > 5: - return 42 - else: - return x + y - - numba now sees this as - f(x: MaskedType, y: MaskedType) -> MaskedType OR literal - - In general, when numba sees a function that could return - more than a single type, it invokes unification to find a - common type that can hold all possible results, eg given - a function that could return an int8 or an int16, numba - will resolve the type to int16. However it does not know - how to unify MaskedType with primitive types. We need to - actually define the common type between MaskedType and - literals, as well as between a Masked and an NA. For full - generality, we'd need to unify NA and literal as well. - ''' - - # If we have Masked and NA, the output should be a - # MaskedType with the original type as its value_type - if isinstance(other, NAType): - return MaskedType(self.value_type) - - # if we have MaskedType and Literal, the output should be - # determined from the MaskedType.value_type (which is a - # primitive type) and other - unified = context.unify_pairs(self.value_type, other) - if unified is None: - return None - - return MaskedType(unified) - -class NAType(types.Type): - def __init__(self): - super().__init__(name="NA") - - def unify(self, context, other): - ''' - Masked <-> NA works from above - Literal <-> NA -> Masked - ''' - if isinstance(other, types.abstract.Literal): - return MaskedType(other.literal_type) - -@typeof_impl.register(_NAType) -def typeof_na(val, c): - return NAType() - - -@register_model(MaskedType) -class MaskedModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [("value", fe_type.value_type), ("valid", types.bool_)] - models.StructModel.__init__(self, dmm, fe_type, members) - - -register_model(NAType)(models.OpaqueModel) - - -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAdd(AbstractTemplate): - # abstracttemplate vs concretetemplate - def generic(self, args, kws): - if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): - # In the case of op(Masked, Masked), the return type is a Masked - # such that Masked.value - return_type = self.context.resolve_function_type( - self.key, (args[0].value_type, args[1].value_type), kws - ).return_type - return nb_signature( - MaskedType(return_type), - MaskedType(args[0].value_type), - MaskedType(args[1].value_type), - ) - - -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAddNull(AbstractTemplate): - def generic(self, args, kws): - if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - # In the case of op(Masked, NA), the result has the same - # dtype as the original regardless of what it is - return_type = args[0].value_type - return nb_signature( - MaskedType(return_type), - MaskedType(args[0].value_type), - NAType(), - ) - - -@cuda_lower(operator.add, MaskedType, MaskedType) -def masked_scalar_add_impl(context, builder, sig, args): - # get the types from the signature - - masked_type_1, masked_type_2 = sig.args - masked_return_type = sig.return_type - - # create LLVM IR structs - m1 = cgutils.create_struct_proxy(masked_type_1)( - context, builder, value=args[0] - ) - m2 = cgutils.create_struct_proxy(masked_type_2)( - context, builder, value=args[1] - ) - result = cgutils.create_struct_proxy(masked_return_type)(context, builder) - - valid = builder.and_(m1.valid, m2.valid) - result.valid = valid - with builder.if_then(valid): - # result.value = numba_op(m1.value, m2.value) - result.value = context.compile_internal( - builder, - lambda x, y: x + y, - nb_signature(masked_return_type.value_type, masked_type_1.value_type, masked_type_2.value_type), (m1.value, m2.value) - ) - - return result._getvalue() - - -@cuda_lower(operator.add, MaskedType, NAType) -def masked_scalar_add_na_impl(context, builder, sig, args): - return_type = sig.return_type - # use context to get llvm type for a bool - result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( - context, builder - ) - result.valid = context.get_constant(types.boolean, 0) - return result._getvalue() - - -@cuda_lowering_registry.lower_constant(NAType) -def constant_dummy(context, builder, ty, pyval): - # This handles None, etc. - return context.get_dummy_value() - - -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAddConstant(AbstractTemplate): - def generic(self, args, kws): - if isinstance(args[0], MaskedType) and isinstance( - args[1], types.Integer - ): - # In the case of op(Masked, constant), we resolve the type between - # the Masked value_type and the constant's type directly - return_type = self.context.resolve_function_type( - self.key, (args[0].value_type, args[1]), kws - ).return_type - return nb_signature( - MaskedType(return_type), - MaskedType(args[0].value_type), - args[1], - ) - - -@cuda_lower(operator.add, MaskedType, types.Integer) -def masked_scalar_add_constant_impl(context, builder, sig, input_values): - masked_type, const_type = sig.args - - indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( - context, builder, value=input_values[0] - ) - result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( - context, builder - ) - # to_add_const = context.get_constant(const_type, input_values[1]) - - result.valid = context.get_constant(types.boolean, 0) - with builder.if_then(indata.valid): - result.value = builder.add(indata.value, input_values[1]) - result.valid = context.get_constant(types.boolean, 1) - - return result._getvalue() - -# To handle the unification, we need to support casting from any type to an -# extension type. The cast implementation takes the value passed in and returns -# an extension struct wrapping that value. -@cuda_impl_registry.lower_cast(types.Any, MaskedType) -def cast_primitive_to_masked(context, builder, fromty, toty, val): - casted = context.cast(builder, val, fromty, toty.value_type) - ext = cgutils.create_struct_proxy(toty)(context, builder) - ext.value = casted - return ext._getvalue() - -@cuda_impl_registry.lower_cast(NAType, MaskedType) -def cast_na_to_masked(context, builder, fromty, toty, val): - result = cgutils.create_struct_proxy(toty)(context, builder) - result.valid = context.get_constant(types.boolean, 0) - - return result._getvalue() - -def compile_udf(func, dtypes): - n_params = len(py_signature(func).parameters) - to_compiler_sig = tuple( - MaskedType(arg) - for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) - ) - # Get the inlineable PTX function - ptx, numba_output_type = cuda.compile_ptx_for_current_device( - func, to_compiler_sig, device=True - ) - numpy_output_type = numpy_support.as_dtype( - numba_output_type.value_type - ) - - return numpy_output_type, ptx - - -NA = _NAType() - - -def nulludf(func): - def wrapper(*args): - from cudf import DataFrame - - to_udf_table = DataFrame( - {idx: arg for idx, arg in zip(range(len(args)), args)} - ) - return to_udf_table._apply(func) - - return wrapper diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py new file mode 100644 index 00000000000..4608cae3228 --- /dev/null +++ b/python/cudf/cudf/core/udf/__init__.py @@ -0,0 +1 @@ +from . import typing, lowering diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py new file mode 100644 index 00000000000..7bff26993d9 --- /dev/null +++ b/python/cudf/cudf/core/udf/lowering.py @@ -0,0 +1,114 @@ +from numba.cuda.cudaimpl import ( + lower as cuda_lower, + registry as cuda_lowering_registry, +) +from numba.core.typing import signature as nb_signature +from cudf.core.udf.typing import MaskedType, NAType +from numba.core import cgutils +from numba.cuda.cudaimpl import registry as cuda_impl_registry +import operator +from numba.extending import types + +@cuda_lowering_registry.lower_constant(NAType) +def constant_dummy(context, builder, ty, pyval): + # This handles None, etc. + return context.get_dummy_value() + +# In the typing phase, we declared that a `MaskedType` can be +# added to another `MaskedType` and specified what kind of +# `MaskedType` would result. Now we have to actually fill in +# the implementation details of how to do that. This is where +# we can involve both validities in constructing the answer +@cuda_lower(operator.add, MaskedType, MaskedType) +def masked_scalar_add_impl(context, builder, sig, args): + ''' + Implement `MaskedType` + `MaskedType` + ''' + + masked_type_1, masked_type_2 = sig.args # MaskedType(...), MaskedType(...) + masked_return_type = sig.return_type # MaskedType(...) + + # Let there be two actual LLVM structs backing the two inputs + # https://mapping-high-level-constructs-to-llvm-ir.readthedocs.io/en/latest/basic-constructs/structures.html + m1 = cgutils.create_struct_proxy(masked_type_1)( + context, builder, value=args[0] + ) + m2 = cgutils.create_struct_proxy(masked_type_2)( + context, builder, value=args[1] + ) + + # we will return an output struct + result = cgutils.create_struct_proxy(masked_return_type)(context, builder) + + # compute output validity + valid = builder.and_(m1.valid, m2.valid) + result.valid = valid + with builder.if_then(valid): + # Let numba handle generating the extra LLVM needed to perform + # operations on mixed types, by compiling the final core op between + # the two primitive values as a separate function and calling it + result.value = context.compile_internal( + builder, + lambda x, y: x + y, + nb_signature( + masked_return_type.value_type, + masked_type_1.value_type, + masked_type_2.value_type + ), + (m1.value, m2.value) + ) + return result._getvalue() + +@cuda_lower(operator.add, MaskedType, NAType) +def masked_scalar_add_na_impl(context, builder, sig, args): + ''' + Implement `MaskedType` + `NAType` + The answer to this is known up front so no actual addition + needs to take place + ''' + + return_type = sig.return_type # MaskedType(...) + result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( + context, builder + ) + + # Invalidate the struct and leave `value` uninitialized + result.valid = context.get_constant(types.boolean, 0) + return result._getvalue() + +@cuda_lower(operator.add, MaskedType, types.Integer) +def masked_scalar_add_constant_impl(context, builder, sig, input_values): + ''' + Implement `MaskedType` + constant + ''' + masked_type, const_type = sig.args + + indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder, value=input_values[0] + ) + result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder + ) + result.valid = context.get_constant(types.boolean, 0) + with builder.if_then(indata.valid): + result.value = builder.add(indata.value, input_values[1]) + result.valid = context.get_constant(types.boolean, 1) + + return result._getvalue() + +# To handle the unification, we need to support casting from any type to an +# extension type. The cast implementation takes the value passed in and returns +# an extension struct wrapping that value. +@cuda_impl_registry.lower_cast(types.Any, MaskedType) +def cast_primitive_to_masked(context, builder, fromty, toty, val): + casted = context.cast(builder, val, fromty, toty.value_type) + ext = cgutils.create_struct_proxy(toty)(context, builder) + ext.value = casted + return ext._getvalue() + +@cuda_impl_registry.lower_cast(NAType, MaskedType) +def cast_na_to_masked(context, builder, fromty, toty, val): + result = cgutils.create_struct_proxy(toty)(context, builder) + result.valid = context.get_constant(types.boolean, 0) + + return result._getvalue() diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py new file mode 100644 index 00000000000..f85c1ffdd28 --- /dev/null +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -0,0 +1,49 @@ +from inspect import signature as py_signature +from cudf.core.udf.typing import MaskedType +from numba.np import numpy_support +from numba import cuda + +def compile_udf(func, dtypes): + ''' + Generate an inlineable PTX function that will be injected into + a variadic kernel inside libcudf + + assume all input types are `MaskedType(input_col.dtype)` and then + compile the requestied PTX function as a function over those types + ''' + to_compiler_sig = tuple( + MaskedType(arg) + for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) + ) + # Get the inlineable PTX function + ptx, numba_output_type = cuda.compile_ptx_for_current_device( + func, to_compiler_sig, device=True + ) + numpy_output_type = numpy_support.as_dtype( + numba_output_type.value_type + ) + + return numpy_output_type, ptx + +def nulludf(func): + ''' + Mimic pandas API: + + def f(x, y): + return x + y + df.apply(lambda row: f(row['x'], row['y'])) + + in this scheme, `row` is actually the whole dataframe + `DataFrame` sends `self` in as `row` and subsequently + we end up calling `f` on the resulting columns since + the dataframe is dict-like + ''' + def wrapper(*args): + from cudf import DataFrame + # This probably creates copies but is fine for now + to_udf_table = DataFrame( + {idx: arg for idx, arg in zip(range(len(args)), args)} + ) + # Frame._apply + return to_udf_table._apply(func) + return wrapper diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py new file mode 100644 index 00000000000..28aace52da9 --- /dev/null +++ b/python/cudf/cudf/core/udf/typing.py @@ -0,0 +1,186 @@ +from numba import types +from cudf.core.scalar import _NAType +from numba.core.extending import typeof_impl, register_model, models +from numba.cuda.cudadecl import registry as cuda_decl_registry +from numba.core.typing.templates import AbstractTemplate +from numba.core.typing import signature as nb_signature + +import operator + +class MaskedType(types.Type): + ''' + A numba type consiting of a value of some primitive type + and a validity boolean, over which we can define math ops + ''' + def __init__(self, value): + # MaskedType in numba shall be parameterized + # with a value type + super().__init__(name="Masked") + self.value_type = value + + def __repr__(self): + return f"MaskedType({self.value_type})" + + def __hash__(self): + ''' + Needed so that numba caches type instances with different + `value_type` separately. + ''' + return self.__repr__().__hash__() + + def unify(self, context, other): + ''' + Logic for sorting out what to do when the UDF conditionally + returns a `MaskedType`, an `NAType`, or a literal based off + the data at runtime. + + In this framework, every input column is treated as having + type `MaskedType`. Operations like `x + y` are understood + as translating to: + + `Masked(value=x, valid=True) + Masked(value=y, valid=True)` + + This means if the user writes a function such as + def f(x, y): + return x + y + + numba sees this function as: + f(x: MaskedType, y: MaskedType) -> MaskedType + + However if the user writes something like: + def f(x, y): + if x > 5: + return 42 + else: + return x + y + + numba now sees this as + f(x: MaskedType, y: MaskedType) -> MaskedType OR literal + ''' + + # If we have Masked and NA, the output should be a + # MaskedType with the original type as its value_type + if isinstance(other, NAType): + return MaskedType(self.value_type) + + # if we have MaskedType and Literal, the output should be + # determined from the MaskedType.value_type (which is a + # primitive type) and other + unified = context.unify_pairs(self.value_type, other) + if unified is None: + return None + + return MaskedType(unified) + +# Tell numba how `MaskedType` is constructed on the backend in terms +# of primitive things that exist at the LLVM level +@register_model(MaskedType) +class MaskedModel(models.StructModel): + def __init__(self, dmm, fe_type): + # This struct has two members, a value and a validity + # let the type of the `value` field be the same as the + # `value_type` and let `valid` be a boolean + members = [("value", fe_type.value_type), ("valid", types.bool_)] + models.StructModel.__init__(self, dmm, fe_type, members) + + +class NAType(types.Type): + ''' + A type for handling ops against nulls + Exists so we can: + 1. Teach numba that all occurances of `cudf.NA` are + to be read as instances of this type instead + 2. Define ops like `if x is cudf.NA` where `x` is of + type `Masked` to mean `if x.valid is False` + ''' + def __init__(self): + super().__init__(name="NA") + + def unify(self, context, other): + ''' + Masked <-> NA works from above + Literal <-> NA -> Masked + ''' + if isinstance(other, types.abstract.Literal): + return MaskedType(other.literal_type) + + +@typeof_impl.register(_NAType) +def typeof_na(val, c): + ''' + Tie instances of _NAType (cudf.NA) to our NAType. + Effectively make it so numba sees `cudf.NA` as an + instance of this NAType -> handle it accordingly. + ''' + return NAType() + +register_model(NAType)(models.OpaqueModel) + + +# Ultimately, we want numba to produce PTX code that specifies how to add +# two singular `Masked` structs together, which is defined as producing a +# new `Masked` with the right validity and if valid, the correct value. +# This happens in two phases: +# 1. Specify that `Masked` + `Masked` exists and what it should return +# 2. Implement how to actually do (1) at the LLVM level +# The following code accomplishes (1) - it is really just a way of specifying +# that the `+` operation has a CUDA overload that accepts two `Masked` that +# are parameterized with `value_type` and what flavor of `Masked` to return. +@cuda_decl_registry.register_global(operator.add) +class MaskedScalarAdd(AbstractTemplate): + def generic(self, args, kws): + ''' + Typing for `Masked` + `Masked` + Numba expects a valid numba type to be returned if typing is successful + else `None` signifies the error state (this is common across numba) + ''' + if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): + # In the case of op(Masked, Masked), the return type is a Masked + # such that Masked.value is the primitive type that would have + # been resolved if we were just adding the `value_type`s. + return_type = self.context.resolve_function_type( + self.key, (args[0].value_type, args[1].value_type), kws + ).return_type + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + MaskedType(args[1].value_type), + ) + +@cuda_decl_registry.register_global(operator.add) +class MaskedScalarAddNull(AbstractTemplate): + def generic(self, args, kws): + ''' + Typing for `Masked` + `NA` + Handles situations like `x + cudf.NA` + ''' + if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): + # In the case of op(Masked, NA), the result has the same + # dtype as the original regardless of what it is + return_type = args[0].value_type + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + NAType(), + ) + +@cuda_decl_registry.register_global(operator.add) +class MaskedScalarAddConstant(AbstractTemplate): + def generic(self, args, kws): + ''' + Typing for `Masked` + a constant literal + handles situations like `x + 1` + ''' + if isinstance(args[0], MaskedType) and isinstance( + args[1], types.Integer + ): + # In the case of op(Masked, constant), we resolve the type between + # the Masked value_type and the constant's type directly + return_type = self.context.resolve_function_type( + self.key, (args[0].value_type, args[1]), kws + ).return_type + return nb_signature( + MaskedType(return_type), + MaskedType(args[0].value_type), + args[1], + ) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 252ddd24088..529f810994f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -1,5 +1,5 @@ import cudf -from cudf.core.udf import nulludf +from cudf.core.udf.pipeline import nulludf from cudf.tests.utils import assert_eq, NUMERIC_TYPES import pandas as pd import itertools From 6287404e6cbcc7b2f6a24efd3d6a1c4679372c7e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 12 May 2021 11:56:27 -0700 Subject: [PATCH 052/132] remove erroneous header --- cpp/src/transform/transform.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 1d8f76d9e2c..a6eb9407f03 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -16,7 +16,6 @@ #include #include -#include #include From aa38be229e36df95dc547da1ea36e44e5987dc1b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 13 May 2021 06:28:45 -0700 Subject: [PATCH 053/132] typing and lowering for Masked is NA, currently not working --- python/cudf/cudf/core/udf/lowering.py | 18 ++++++++++++++++++ python/cudf/cudf/core/udf/typing.py | 13 +++++++++++++ 2 files changed, 31 insertions(+) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 7bff26993d9..35839164812 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -96,6 +96,24 @@ def masked_scalar_add_constant_impl(context, builder, sig, input_values): return result._getvalue() +@cuda_lower(operator.is_, MaskedType, NAType) +def masked_scalar_is_null_impl(context, builder, sig, args): + ''' + Implement `MaskedType` + constant + ''' + masked_type, na = sig.args + indata = cgutils.create_struct_proxy(masked_type)( + context, builder, value=args[0] + ) + result = context.get_constant(types.boolean, 1) + + with builder.if_else(indata.valid) as (then, otherwise): + with then: + result = context.get_constant(types.boolean, 0) + with otherwise: + result = context.get_constant(types.boolean, 1) + return result + # To handle the unification, we need to support casting from any type to an # extension type. The cast implementation takes the value passed in and returns # an extension struct wrapping that value. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 28aace52da9..0377e370349 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -147,6 +147,19 @@ def generic(self, args, kws): MaskedType(args[1].value_type), ) + +@cuda_decl_registry.register_global(operator.is_) +class MaskedScalarIsNull(AbstractTemplate): + ''' + Typing for `Masked is cudf.NA` + ''' + def generic(self, args, kws): + if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): + return nb_signature( + types.boolean, + MaskedType(args[0].value_type), + NAType()) + @cuda_decl_registry.register_global(operator.add) class MaskedScalarAddNull(AbstractTemplate): def generic(self, args, kws): From b5dcd13452be941d9f2ce8cfcd096f8ae5e44b74 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 13 May 2021 06:32:04 -0700 Subject: [PATCH 054/132] remove erroneous logic --- python/cudf/cudf/core/udf/lowering.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 35839164812..63746edda31 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -105,7 +105,6 @@ def masked_scalar_is_null_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(masked_type)( context, builder, value=args[0] ) - result = context.get_constant(types.boolean, 1) with builder.if_else(indata.valid) as (then, otherwise): with then: From 4c29c23208e8322f33df19403815ee63fd3d6fae Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 13 May 2021 08:54:32 -0700 Subject: [PATCH 055/132] fix lowering for Masked is NA --- python/cudf/cudf/core/udf/lowering.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 63746edda31..a64722151d6 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -8,6 +8,7 @@ from numba.cuda.cudaimpl import registry as cuda_impl_registry import operator from numba.extending import types +from llvmlite import ir @cuda_lowering_registry.lower_constant(NAType) def constant_dummy(context, builder, ty, pyval): @@ -105,13 +106,14 @@ def masked_scalar_is_null_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(masked_type)( context, builder, value=args[0] ) - + result = cgutils.alloca_once(builder, ir.IntType(1)) with builder.if_else(indata.valid) as (then, otherwise): with then: - result = context.get_constant(types.boolean, 0) + builder.store(context.get_constant(types.boolean, 0), result) with otherwise: - result = context.get_constant(types.boolean, 1) - return result + builder.store(context.get_constant(types.boolean, 1), result) + + return builder.load(result) # To handle the unification, we need to support casting from any type to an # extension type. The cast implementation takes the value passed in and returns @@ -121,6 +123,7 @@ def cast_primitive_to_masked(context, builder, fromty, toty, val): casted = context.cast(builder, val, fromty, toty.value_type) ext = cgutils.create_struct_proxy(toty)(context, builder) ext.value = casted + ext.valid = context.get_constant(types.boolean, 1) return ext._getvalue() @cuda_impl_registry.lower_cast(NAType, MaskedType) From 837f2ef3a60c5c4ed55a5abe859ae5c4bc01c0be Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 13 May 2021 12:29:00 -0700 Subject: [PATCH 056/132] rougly fix test_apply_NA_conditional, which was passing by coincidence --- python/cudf/cudf/tests/test_udf_masked_ops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 529f810994f..d36d2c1616f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -109,11 +109,14 @@ def func_gdf(x, y): 'b':[4,5,None, None] }) - pdf = gdf.to_pandas() + pdf = gdf.to_pandas(nullable=True) expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) + # using a UDF on a nullable dtype in pandas casts to object + expect = expect.astype(pd.Int64Dtype()) + obtain = obtain.to_pandas(nullable=True) assert_eq(expect, obtain) From 2d7104d1ed1e5fd47c3acec293c66ff577805907 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 14 May 2021 16:05:56 -0500 Subject: [PATCH 057/132] support and test all arithmetic operators --- python/cudf/cudf/core/udf/lowering.py | 105 ++++++++++++------ python/cudf/cudf/core/udf/typing.py | 18 ++- python/cudf/cudf/tests/test_udf_masked_ops.py | 48 ++++++++ 3 files changed, 135 insertions(+), 36 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index a64722151d6..5021a32d138 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -10,6 +10,18 @@ from numba.extending import types from llvmlite import ir +import operator +arith_ops = [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + operator.pow + +] + @cuda_lowering_registry.lower_constant(NAType) def constant_dummy(context, builder, ty, pyval): # This handles None, etc. @@ -20,45 +32,70 @@ def constant_dummy(context, builder, ty, pyval): # `MaskedType` would result. Now we have to actually fill in # the implementation details of how to do that. This is where # we can involve both validities in constructing the answer -@cuda_lower(operator.add, MaskedType, MaskedType) -def masked_scalar_add_impl(context, builder, sig, args): + +def make_arithmetic_op(op): ''' - Implement `MaskedType` + `MaskedType` + Make closures that implement arithmetic operations. See + register_arithmetic_op for details. ''' + def masked_scalar_op_impl(context, builder, sig, args): + ''' + Implement `MaskedType` + `MaskedType` + ''' + + masked_type_1, masked_type_2 = sig.args # MaskedType(...), MaskedType(...) + masked_return_type = sig.return_type # MaskedType(...) + + # Let there be two actual LLVM structs backing the two inputs + # https://mapping-high-level-constructs-to-llvm-ir.readthedocs.io/en/latest/basic-constructs/structures.html + m1 = cgutils.create_struct_proxy(masked_type_1)( + context, builder, value=args[0] + ) + m2 = cgutils.create_struct_proxy(masked_type_2)( + context, builder, value=args[1] + ) - masked_type_1, masked_type_2 = sig.args # MaskedType(...), MaskedType(...) - masked_return_type = sig.return_type # MaskedType(...) + # we will return an output struct + result = cgutils.create_struct_proxy(masked_return_type)(context, builder) + + # compute output validity + valid = builder.and_(m1.valid, m2.valid) + result.valid = valid + with builder.if_then(valid): + # Let numba handle generating the extra LLVM needed to perform + # operations on mixed types, by compiling the final core op between + # the two primitive values as a separate function and calling it + result.value = context.compile_internal( + builder, + lambda x, y: op(x, y), + nb_signature( + masked_return_type.value_type, + masked_type_1.value_type, + masked_type_2.value_type + ), + (m1.value, m2.value) + ) + return result._getvalue() + return masked_scalar_op_impl + +def register_arithmetic_op(op): + ''' + Register a lowering implementation for the + arithmetic op `op`. - # Let there be two actual LLVM structs backing the two inputs - # https://mapping-high-level-constructs-to-llvm-ir.readthedocs.io/en/latest/basic-constructs/structures.html - m1 = cgutils.create_struct_proxy(masked_type_1)( - context, builder, value=args[0] - ) - m2 = cgutils.create_struct_proxy(masked_type_2)( - context, builder, value=args[1] - ) + Because the lowering implementations compile the final + op separately using a lambda and compile_internal, `op` + needs to be tied to each lowering implementation using + a closure. - # we will return an output struct - result = cgutils.create_struct_proxy(masked_return_type)(context, builder) - - # compute output validity - valid = builder.and_(m1.valid, m2.valid) - result.valid = valid - with builder.if_then(valid): - # Let numba handle generating the extra LLVM needed to perform - # operations on mixed types, by compiling the final core op between - # the two primitive values as a separate function and calling it - result.value = context.compile_internal( - builder, - lambda x, y: x + y, - nb_signature( - masked_return_type.value_type, - masked_type_1.value_type, - masked_type_2.value_type - ), - (m1.value, m2.value) - ) - return result._getvalue() + This function makes and lowers a closure for one op. + + ''' + to_lower_op = make_arithmetic_op(op) + cuda_lower(op, MaskedType, MaskedType)(to_lower_op) + +for op in arith_ops: + register_arithmetic_op(op) @cuda_lower(operator.add, MaskedType, NAType) def masked_scalar_add_na_impl(context, builder, sig, args): diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 0377e370349..4ac145c511b 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -7,6 +7,18 @@ import operator +arith_ops = [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + operator.pow + +] + + class MaskedType(types.Type): ''' A numba type consiting of a value of some primitive type @@ -126,8 +138,7 @@ def typeof_na(val, c): # The following code accomplishes (1) - it is really just a way of specifying # that the `+` operation has a CUDA overload that accepts two `Masked` that # are parameterized with `value_type` and what flavor of `Masked` to return. -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAdd(AbstractTemplate): +class MaskedScalarArithOp(AbstractTemplate): def generic(self, args, kws): ''' Typing for `Masked` + `Masked` @@ -147,6 +158,9 @@ def generic(self, args, kws): MaskedType(args[1].value_type), ) +for op in arith_ops: + # Every op shares the same typing class + cuda_decl_registry.register_global(op)(MaskedScalarArithOp) @cuda_decl_registry.register_global(operator.is_) class MaskedScalarIsNull(AbstractTemplate): diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 529f810994f..e8c395194e2 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -4,6 +4,54 @@ import pandas as pd import itertools import pytest +import operator + +arith_ops = [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + operator.pow + +] + +def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): + gdf = data + pdf = data.to_pandas(nullable=True) + + expect = pdf.apply( + lambda row: func_pdf( + *[row[i] for i in data.columns] + ), + axis=1 + ) + obtain = gdf.apply( + lambda row: func_gdf( + *[row[i] for i in data.columns] + ), + axis=1 + ) + assert_eq(expect, obtain, **kwargs) + +@pytest.mark.parametrize('op', arith_ops) +def test_arith_masked_vs_masked(op): + # This test should test all the typing + # and lowering for arithmetic ops between + # two columns + def func_pdf(x, y): + return op(x, y) + + @nulludf + def func_gdf(x, y): + return op(x, y) + + gdf = cudf.DataFrame({ + 'a':[1,None,3, None], + 'b':[4,5,None, None] + }) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) def test_apply_basic(): def func_pdf(x, y): From b63b435d5a291b8ca0048797deec2ba61b95df04 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 16 May 2021 10:14:57 -0500 Subject: [PATCH 058/132] typing, lowering, tests for masked+constant --- python/cudf/cudf/core/udf/lowering.py | 63 ++++++++++++------- python/cudf/cudf/core/udf/typing.py | 40 ++++++------ python/cudf/cudf/tests/test_udf_masked_ops.py | 24 ++++--- 3 files changed, 73 insertions(+), 54 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 5021a32d138..10631b4d84c 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -94,11 +94,8 @@ def register_arithmetic_op(op): to_lower_op = make_arithmetic_op(op) cuda_lower(op, MaskedType, MaskedType)(to_lower_op) -for op in arith_ops: - register_arithmetic_op(op) - @cuda_lower(operator.add, MaskedType, NAType) -def masked_scalar_add_na_impl(context, builder, sig, args): +def masked_scalar_null_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + `NAType` The answer to this is known up front so no actual addition @@ -114,25 +111,49 @@ def masked_scalar_add_na_impl(context, builder, sig, args): result.valid = context.get_constant(types.boolean, 0) return result._getvalue() -@cuda_lower(operator.add, MaskedType, types.Integer) -def masked_scalar_add_constant_impl(context, builder, sig, input_values): - ''' - Implement `MaskedType` + constant - ''' - masked_type, const_type = sig.args +def make_const_op(op): + def masked_scalar_const_op_impl(context, builder, sig, input_values): + ''' + Implement `MaskedType` + constant + ''' + masked_type, const_type = sig.args - indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( - context, builder, value=input_values[0] - ) - result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( - context, builder - ) - result.valid = context.get_constant(types.boolean, 0) - with builder.if_then(indata.valid): - result.value = builder.add(indata.value, input_values[1]) - result.valid = context.get_constant(types.boolean, 1) + indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder, value=input_values[0] + ) + result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + context, builder + ) + result.valid = context.get_constant(types.boolean, 0) + with builder.if_then(indata.valid): + + result.value = context.compile_internal( + builder, + lambda x, y: op(x, y), + nb_signature( + masked_type.value_type, + masked_type.value_type, + const_type + ), + (indata.value, input_values[1]) + ) + result.valid = context.get_constant(types.boolean, 1) + + return result._getvalue() + return masked_scalar_const_op_impl + +def register_const_op(op): + to_lower_op = make_const_op(op) + cuda_lower(op, MaskedType, types.Number)(to_lower_op) + + +# register all lowering at init +for op in arith_ops: + register_arithmetic_op(op) + register_const_op(op) + # null op impl can be shared between all ops + cuda_lower(op, MaskedType, NAType)(masked_scalar_null_op_impl) - return result._getvalue() @cuda_lower(operator.is_, MaskedType, NAType) def masked_scalar_is_null_impl(context, builder, sig, args): diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 4ac145c511b..2a32e8f2f3b 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -158,24 +158,7 @@ def generic(self, args, kws): MaskedType(args[1].value_type), ) -for op in arith_ops: - # Every op shares the same typing class - cuda_decl_registry.register_global(op)(MaskedScalarArithOp) - -@cuda_decl_registry.register_global(operator.is_) -class MaskedScalarIsNull(AbstractTemplate): - ''' - Typing for `Masked is cudf.NA` - ''' - def generic(self, args, kws): - if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - return nb_signature( - types.boolean, - MaskedType(args[0].value_type), - NAType()) - -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAddNull(AbstractTemplate): +class MaskedScalarNullOp(AbstractTemplate): def generic(self, args, kws): ''' Typing for `Masked` + `NA` @@ -191,8 +174,7 @@ def generic(self, args, kws): NAType(), ) -@cuda_decl_registry.register_global(operator.add) -class MaskedScalarAddConstant(AbstractTemplate): +class MaskedScalarConstOp(AbstractTemplate): def generic(self, args, kws): ''' Typing for `Masked` + a constant literal @@ -211,3 +193,21 @@ def generic(self, args, kws): MaskedType(args[0].value_type), args[1], ) + +@cuda_decl_registry.register_global(operator.is_) +class MaskedScalarIsNull(AbstractTemplate): + ''' + Typing for `Masked is cudf.NA` + ''' + def generic(self, args, kws): + if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): + return nb_signature( + types.boolean, + MaskedType(args[0].value_type), + NAType()) + +for op in arith_ops: + # Every op shares the same typing class + cuda_decl_registry.register_global(op)(MaskedScalarArithOp) + cuda_decl_registry.register_global(op)(MaskedScalarNullOp) + cuda_decl_registry.register_global(op)(MaskedScalarConstOp) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index e8c395194e2..23bd8ae4a9c 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -53,26 +53,24 @@ def func_gdf(x, y): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -def test_apply_basic(): - def func_pdf(x, y): - return x + y - +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('constant', [1]) +def test_arith_masked_vs_constant(op, constant): + def func_pdf(x): + return op(x, constant) + @nulludf - def func_gdf(x, y): - return x + y - + def func_gdf(x): + return op(x, constant) + # Just a single column -> result will be all NA gdf = cudf.DataFrame({ - 'a':[1,2,3], - 'b':[4,5,6] + 'data': [1,2,3] }) - pdf = gdf.to_pandas() + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - assert_eq(expect, obtain) def test_apply_null(): def func_pdf(x, y): From a3e1444dc0766a3bb33d1d3a7d2615667aa067ce Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 16 May 2021 10:52:12 -0500 Subject: [PATCH 059/132] try and type mixed return value, and fail to do so --- python/cudf/cudf/core/udf/typing.py | 2 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 2a32e8f2f3b..5b3895e55a6 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -181,7 +181,7 @@ def generic(self, args, kws): handles situations like `x + 1` ''' if isinstance(args[0], MaskedType) and isinstance( - args[1], types.Integer + args[1], types.Number ): # In the case of op(Masked, constant), we resolve the type between # the Masked value_type and the constant's type directly diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 23bd8ae4a9c..c051b854ae8 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -54,7 +54,7 @@ def func_gdf(x, y): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @pytest.mark.parametrize('op', arith_ops) -@pytest.mark.parametrize('constant', [1]) +@pytest.mark.parametrize('constant', [1, 1.5]) def test_arith_masked_vs_constant(op, constant): def func_pdf(x): return op(x, constant) From ca79d722ea7732e113ce5c7d527302def2b4e0d5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 16 May 2021 10:57:31 -0500 Subject: [PATCH 060/132] continued adding/refactoring of tests --- python/cudf/cudf/tests/test_udf_masked_ops.py | 77 +++---------------- 1 file changed, 10 insertions(+), 67 deletions(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index c051b854ae8..b47af7ffc6e 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -70,72 +70,21 @@ def func_gdf(x): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - - -def test_apply_null(): - def func_pdf(x, y): - return x + y - - @nulludf - def func_gdf(x, y): - return x + y - - - gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] - }) - - pdf = gdf.to_pandas() - - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - assert_eq(expect, obtain) - - -def test_apply_add_null(): - def func_pdf(x, y): - return x + y + pd.NA - - @nulludf - def func_gdf(x, y): - return x + y + cudf.NA - - - gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] - }) - - pdf = gdf.to_pandas() - - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - # TODO: dtype mismatch here - assert_eq(expect, obtain, check_dtype=False) - - -def test_apply_add_constant(): - def func_pdf(x, y): - return x + y + 1 +@pytest.mark.parametrize('op', arith_ops) +def test_arith_masked_vs_null(op): + def func_pdf(x): + return op(x, pd.NA) @nulludf - def func_gdf(x, y): - return x + y + 1 - + def func_gdf(x): + return op(x, cudf.NA) gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] + 'data': [1, None, 3] }) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - pdf = gdf.to_pandas() - - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - assert_eq(expect, obtain) - -def test_apply_NA_conditional(): +def test_masked_is_null_conditional(): def func_pdf(x, y): if x is pd.NA: return y @@ -154,13 +103,7 @@ def func_gdf(x, y): 'a':[1,None,3, None], 'b':[4,5,None, None] }) - - pdf = gdf.to_pandas() - - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - - assert_eq(expect, obtain) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @pytest.mark.parametrize('dtype_a', list(NUMERIC_TYPES)) From e1defcbd3f0f50f04ed07f461583c97ce5ddb24a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 07:55:14 -0700 Subject: [PATCH 061/132] fix ops between masked and const of different dtype --- python/cudf/cudf/core/udf/lowering.py | 6 +++--- python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 10631b4d84c..3e4b12b8c5c 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -117,11 +117,11 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): Implement `MaskedType` + constant ''' masked_type, const_type = sig.args - + return_type = sig.return_type indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( context, builder, value=input_values[0] ) - result = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( context, builder ) result.valid = context.get_constant(types.boolean, 0) @@ -131,7 +131,7 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): builder, lambda x, y: op(x, y), nb_signature( - masked_type.value_type, + return_type.value_type, masked_type.value_type, const_type ), diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index b47af7ffc6e..b55c779d29f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -65,7 +65,7 @@ def func_gdf(x): # Just a single column -> result will be all NA gdf = cudf.DataFrame({ - 'data': [1,2,3] + 'data': [1,2,None] }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 33d3dcb474fcccd8e754451c5c0d1c8216efff0b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 08:59:45 -0700 Subject: [PATCH 062/132] update tests --- python/cudf/cudf/tests/test_udf_masked_ops.py | 34 +++++-------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index b55c779d29f..fc2eb3c8f29 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -123,36 +123,23 @@ def func_gdf(x, y): gdf['a'] = gdf['a'].astype(dtype_a) gdf['b'] = gdf['b'].astype(dtype_b) - pdf = gdf.to_pandas() + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - # currently, cases where one side is float32 fail, pandas doing some - # weird casting here and getting float64 always - assert_eq(expect, obtain) - - -def test_apply_return_literal(): - # 1. Casting rule literal -> Masked - # -> a) make it so numba knows that we can even promote literals to Masked () - # -> b) implement custom lowering to specify how this actually happens (python only) - - - # 2. Custom unfication code - - - # numba/core/type +@pytest.mark.parametrize('val', [ + 5, 5.5 +]) +def test_apply_return_literal(val): def func_pdf(x, y): if x is pd.NA: - return 5 + return val else: return x + y @nulludf def func_gdf(x, y): if x is cudf.NA: - return 5 # Masked(5, True) + return val # Masked(5, True) else: return x + y @@ -162,9 +149,4 @@ def func_gdf(x, y): 'b':[4,5,None, None] }) - pdf = gdf.to_pandas() - - expect = pdf.apply(lambda row: func_pdf(row['a'], row['b']), axis=1) - obtain = gdf.apply(lambda row: func_gdf(row['a'], row['b']), axis=1) - - assert_eq(expect, obtain) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 5769ded6eba0101cf4ad5b67177442efb1bd2b73 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 09:37:22 -0700 Subject: [PATCH 063/132] add test for returning NA --- python/cudf/cudf/tests/test_udf_masked_ops.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index fc2eb3c8f29..30ad61ff742 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -150,3 +150,21 @@ def func_gdf(x, y): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + +def test_apply_return_null(): + def func_pdf(x): + if x is pd.NA: + return pd.NA + else: + return x + + @nulludf + def func_gdf(x): + if x is cudf.NA: + return cudf.NA + else: + return x + + gdf = cudf.DataFrame({'a': [1, None, 3]}) + breakpoint() + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 1579b399689e3ae65f3c3ba85bbf768d7237ed28 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 12:10:03 -0700 Subject: [PATCH 064/132] add masked v masked comparison ops --- python/cudf/cudf/core/udf/lowering.py | 26 +++++++++++++- python/cudf/cudf/core/udf/typing.py | 26 +++++++++++++- python/cudf/cudf/tests/test_udf_masked_ops.py | 34 ++++++++++++++++++- 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 3e4b12b8c5c..8c53065cd71 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -22,6 +22,15 @@ ] +comparison_ops = [ + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge +] + @cuda_lowering_registry.lower_constant(NAType) def constant_dummy(context, builder, ty, pyval): # This handles None, etc. @@ -78,6 +87,7 @@ def masked_scalar_op_impl(context, builder, sig, args): return result._getvalue() return masked_scalar_op_impl + def register_arithmetic_op(op): ''' Register a lowering implementation for the @@ -148,7 +158,7 @@ def register_const_op(op): # register all lowering at init -for op in arith_ops: +for op in arith_ops + comparison_ops: register_arithmetic_op(op) register_const_op(op) # null op impl can be shared between all ops @@ -173,6 +183,20 @@ def masked_scalar_is_null_impl(context, builder, sig, args): return builder.load(result) +@cuda_lower(operator.truth, MaskedType) +def masked_scalar_truth_impl(context, builder, sig, args): + indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( + context, builder, value=args[0] + ) + return indata.value + +@cuda_lower(bool, MaskedType) +def masked_scalar_truth_impl(context, builder, sig, args): + indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( + context, builder, value=args[0] + ) + return indata.value + # To handle the unification, we need to support casting from any type to an # extension type. The cast implementation takes the value passed in and returns # an extension struct wrapping that value. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 5b3895e55a6..2fa09fb23a7 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -19,6 +19,15 @@ ] +comparison_ops = [ + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge +] + class MaskedType(types.Type): ''' A numba type consiting of a value of some primitive type @@ -206,7 +215,22 @@ def generic(self, args, kws): MaskedType(args[0].value_type), NAType()) -for op in arith_ops: +@cuda_decl_registry.register_global(operator.truth) +class MaskedScalarTruth(AbstractTemplate): + ''' + Typing for `if Masked` + Used for `if x > y` + The truthiness of a MaskedType shall be the truthiness + of the `value` stored therein + ''' + def generic(self, args, kws): + if isinstance(args[0], MaskedType): + return nb_signature( + types.boolean, + MaskedType(types.boolean) + ) + +for op in arith_ops + comparison_ops: # Every op shares the same typing class cuda_decl_registry.register_global(op)(MaskedScalarArithOp) cuda_decl_registry.register_global(op)(MaskedScalarNullOp) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 30ad61ff742..d81d3855dd0 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -17,6 +17,15 @@ ] +comparison_ops = [ + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge +] + def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): gdf = data pdf = data.to_pandas(nullable=True) @@ -53,6 +62,27 @@ def func_gdf(x, y): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) +@pytest.mark.parametrize('op', comparison_ops) +def test_compare_masked_vs_masked(op): + # this test should test all the + # typing and lowering for comparisons + # between columns + + def func_pdf(x, y): + return op(x, y) + + @nulludf + def func_gdf(x, y): + return op(x, y) + + # we should get: + # [?, ?, , , ] + gdf = cudf.DataFrame({ + 'a': [1, 0, None, 1, None], + 'b': [0, 1, 0, None, None] + }) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + @pytest.mark.parametrize('op', arith_ops) @pytest.mark.parametrize('constant', [1, 1.5]) def test_arith_masked_vs_constant(op, constant): @@ -152,6 +182,9 @@ def func_gdf(x, y): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) def test_apply_return_null(): + ''' + Tests casting / unification of Masked and NA + ''' def func_pdf(x): if x is pd.NA: return pd.NA @@ -166,5 +199,4 @@ def func_gdf(x): return x gdf = cudf.DataFrame({'a': [1, None, 3]}) - breakpoint() run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From df28144019737d6a0034acdc867d5ceebe409498 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 12:37:25 -0700 Subject: [PATCH 065/132] add tests for comparing masked to constant --- python/cudf/cudf/tests/test_udf_masked_ops.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index d81d3855dd0..cd20da693e4 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -100,6 +100,27 @@ def func_gdf(x): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) +@pytest.mark.parametrize('op', comparison_ops) +@pytest.mark.parametrize('constant', [1, 1.5]) +def test_compare_masked_vs_constant(op, constant): + ''' + technically the exact same test as above + ''' + def func_pdf(x): + return op(x, constant) + + @nulludf + def func_gdf(x): + return op(x, constant) + + # Just a single column -> result will be all NA + gdf = cudf.DataFrame({ + 'data': [1,2,None] + }) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + + + @pytest.mark.parametrize('op', arith_ops) def test_arith_masked_vs_null(op): def func_pdf(x): From 8bab8909313019c1d3d2283612b9266ae164da12 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 12:37:21 -0700 Subject: [PATCH 066/132] NA <-> Unmasked unification --- python/cudf/cudf/tests/test_udf_masked_ops.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index cd20da693e4..81665c08d9c 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -221,3 +221,20 @@ def func_gdf(x): gdf = cudf.DataFrame({'a': [1, None, 3]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + +def test_apply_return_either_null_or_literal(): + def func_pdf(x): + if x > 5: + return 2 + else: + return pd.NA + + @nulludf + def func_gdf(x): + if x > 5: + return 2 + else: + return cudf.NA + + gdf = cudf.DataFrame({'a': [1, 3, 6]}) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 6e7ac8dcaf7c30793f449b8374f07846bd6e7fab Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 12:39:01 -0700 Subject: [PATCH 067/132] partially address reviews --- python/cudf/cudf/core/udf/lowering.py | 2 +- python/cudf/cudf/core/udf/typing.py | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 8c53065cd71..443a98c6fec 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -32,7 +32,7 @@ ] @cuda_lowering_registry.lower_constant(NAType) -def constant_dummy(context, builder, ty, pyval): +def constant_na(context, builder, ty, pyval): # This handles None, etc. return context.get_dummy_value() diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 2fa09fb23a7..15f304339dc 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -76,17 +76,18 @@ def f(x, y): return x + y numba now sees this as - f(x: MaskedType, y: MaskedType) -> MaskedType OR literal + f(x: MaskedType(dtype_1), y: MaskedType(dtype_2)) + -> MaskedType(dtype_unified) ''' # If we have Masked and NA, the output should be a # MaskedType with the original type as its value_type if isinstance(other, NAType): - return MaskedType(self.value_type) + return self - # if we have MaskedType and Literal, the output should be - # determined from the MaskedType.value_type (which is a - # primitive type) and other + # if we have MaskedType and something that results in a + # scalar, unify between the MaskedType's value_type + # and that other thing unified = context.unify_pairs(self.value_type, other) if unified is None: return None @@ -122,9 +123,15 @@ def unify(self, context, other): Masked <-> NA works from above Literal <-> NA -> Masked ''' - if isinstance(other, types.abstract.Literal): - return MaskedType(other.literal_type) - + breakpoint() + if isinstance(other, MaskedType): + # bounce to MaskedType.unify + return None + elif isinstance(other, NAType): + # unify {NA, NA} -> NA + return self + else: + return MaskedType(other) @typeof_impl.register(_NAType) def typeof_na(val, c): From d58234ed5b1d0d9ae0dd07013978709a3bd18533 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 12:48:06 -0700 Subject: [PATCH 068/132] just use args --- python/cudf/cudf/core/udf/typing.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 15f304339dc..fbbb104239b 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -123,7 +123,6 @@ def unify(self, context, other): Masked <-> NA works from above Literal <-> NA -> Masked ''' - breakpoint() if isinstance(other, MaskedType): # bounce to MaskedType.unify return None @@ -170,8 +169,8 @@ def generic(self, args, kws): ).return_type return nb_signature( MaskedType(return_type), - MaskedType(args[0].value_type), - MaskedType(args[1].value_type), + args[0], + args[1], ) class MaskedScalarNullOp(AbstractTemplate): @@ -183,10 +182,9 @@ def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): # In the case of op(Masked, NA), the result has the same # dtype as the original regardless of what it is - return_type = args[0].value_type return nb_signature( - MaskedType(return_type), - MaskedType(args[0].value_type), + args[0], + args[0], NAType(), ) @@ -206,7 +204,7 @@ def generic(self, args, kws): ).return_type return nb_signature( MaskedType(return_type), - MaskedType(args[0].value_type), + args[0], args[1], ) @@ -219,7 +217,7 @@ def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): return nb_signature( types.boolean, - MaskedType(args[0].value_type), + args[0], NAType()) @cuda_decl_registry.register_global(operator.truth) From 739f6fc8f52eed543c75e8a2346a1b71d364cdac Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 13:33:40 -0700 Subject: [PATCH 069/132] add reflected ops vs NA --- python/cudf/cudf/core/udf/lowering.py | 3 +-- python/cudf/cudf/core/udf/typing.py | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 443a98c6fec..0e5cdf831d9 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -104,7 +104,6 @@ def register_arithmetic_op(op): to_lower_op = make_arithmetic_op(op) cuda_lower(op, MaskedType, MaskedType)(to_lower_op) -@cuda_lower(operator.add, MaskedType, NAType) def masked_scalar_null_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + `NAType` @@ -163,7 +162,7 @@ def register_const_op(op): register_const_op(op) # null op impl can be shared between all ops cuda_lower(op, MaskedType, NAType)(masked_scalar_null_op_impl) - + cuda_lower(op, NAType, MaskedType)(masked_scalar_null_op_impl) @cuda_lower(operator.is_, MaskedType, NAType) def masked_scalar_is_null_impl(context, builder, sig, args): diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index fbbb104239b..ea5c90f8c62 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -187,6 +187,12 @@ def generic(self, args, kws): args[0], NAType(), ) + elif isinstance(args[0], NAType) and isinstance(args[1], MaskedType): + return nb_signature( + args[1], + NAType(), + args[1] + ) class MaskedScalarConstOp(AbstractTemplate): def generic(self, args, kws): From 14e3ab8547ee7ab79ff7c99cc72376cbcfdbad8f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 13:46:17 -0700 Subject: [PATCH 070/132] add tests for reflected masked/na ops --- python/cudf/cudf/tests/test_udf_masked_ops.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 81665c08d9c..700c57cdc6c 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -135,6 +135,21 @@ def func_gdf(x): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) +@pytest.mark.parametrize('op', arith_ops) +def test_arith_masked_vs_null_reflected(op): + def func_pdf(x): + return op(pd.NA, x) + + @nulludf + def func_gdf(x): + return op(cudf.NA, x) + + gdf = cudf.DataFrame({ + 'data': [1, None, 3] + }) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + + def test_masked_is_null_conditional(): def func_pdf(x, y): if x is pd.NA: From 417c1303adeab9f3ee573e6ac53ffbf0c72170ad Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 14:09:25 -0700 Subject: [PATCH 071/132] typing for const + masked, lowering can wait for now --- python/cudf/cudf/core/udf/typing.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index ea5c90f8c62..357139ff4aa 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -213,7 +213,19 @@ def generic(self, args, kws): args[0], args[1], ) - + elif isinstance(args[0], types.Number) and isinstance( + args[1], MaskedType + ): + breakpoint() + return_type = self.context.resolve_function_type( + self.key, (args[1].value_type, args[0]), kws + ).return_type + return nb_signature( + MaskedType(return_type), + args[0], + args[1], + ) + @cuda_decl_registry.register_global(operator.is_) class MaskedScalarIsNull(AbstractTemplate): ''' From 195e9b8a85f8bcd75802a8693bc53f7ac33b4212 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 14:19:58 -0700 Subject: [PATCH 072/132] add grahams fix for Masked + const of a different type --- python/cudf/cudf/core/udf/lowering.py | 10 ++++++++++ python/cudf/cudf/core/udf/typing.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 0e5cdf831d9..6753351b2d1 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -213,3 +213,13 @@ def cast_na_to_masked(context, builder, fromty, toty, val): result.valid = context.get_constant(types.boolean, 0) return result._getvalue() + +@cuda_impl_registry.lower_cast(MaskedType, MaskedType) +def cast_masked_to_masked(context, builder, fromty, toty, val): + operand = cgutils.create_struct_proxy(fromty)(context, builder, value=val) + casted = context.cast(builder, operand.value, fromty.value_type, + toty.value_type) + ext = cgutils.create_struct_proxy(toty)(context, builder) + ext.value = casted + ext.valid = operand.valid + return ext._getvalue() diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 357139ff4aa..872afc35cde 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -93,6 +93,12 @@ def f(x, y): return None return MaskedType(unified) + + def __eq__(self, other): + if not isinstance(other, MaskedType): + return False + + return self.value_type == other.value_type # Tell numba how `MaskedType` is constructed on the backend in terms # of primitive things that exist at the LLVM level From 32f54d4a3c1dd0061d0e03baa892dedd4025e155 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 28 May 2021 07:41:33 -0700 Subject: [PATCH 073/132] refactor a little c++ --- cpp/src/transform/transform.cpp | 35 ++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index a6eb9407f03..d730ebf5488 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -66,26 +66,37 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -void generalized_operation(table_view data_view, - std::string const& binary_udf, - data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, - rmm::mr::device_memory_resource* mr) +std::vector make_template_types(column_view outcol_view, table_view data_view) { + int entries_per_col = 3; // type, mask, offset + std::string mask_type = "uint32_t*"; + std::string offset_type = "int64_t"; std::vector template_types( - // A ptr, mask ptr, and offset for each column - // plus one for the type of the output column - (data_view.num_columns() * 3) + 1 + // output type comes first and is one extra + (data_view.num_columns() * entries_per_col) + 1 ); template_types[0] = cudf::jit::get_type_name(outcol_view.type()); + for (int i = 0; i < data_view.num_columns(); i++) { - int offset = (i * 3) + 1; + int offset = (i * entries_per_col) + 1; template_types[offset] = cudf::jit::get_type_name(data_view.column(i).type()) + "*"; - template_types[offset + 1] = "uint32_t*"; - template_types[offset + 2] = "int64_t"; + template_types[offset + 1] = mask_type; + template_types[offset + 2] = offset_type; } + return template_types; +} + + +void generalized_operation(table_view data_view, + std::string const& binary_udf, + data_type output_type, + column_view const& outcol_view, + column_view const& outmsk_view, + rmm::mr::device_memory_resource* mr) +{ + + std::vector template_types = make_template_types(outcol_view, data_view); std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") From 37a925747db8a74e85d677926114b5e9bf8b518b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 28 May 2021 09:17:29 -0700 Subject: [PATCH 074/132] minor docstring updates --- python/cudf/cudf/core/dataframe.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ee28640dc49..3b487f96a57 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4712,8 +4712,33 @@ def query(self, expr, local_dict=None): return self._apply_boolean_mask(boolmask) def apply(self, func, axis=1): + """ + Apply a function along an axis of the DataFrame. + + Designed to mimic `pandas.DataFrame.apply`. Applies a user + defined function row wise over a dataframe, with true null + handling. Works with UDFs using `core.udf.pipeline.nulludf` + and returns a single series. Uses numba to jit compile the + function to PTX via LLVM. + + Parameters + ---------- + func : function + Function to apply to each row. + + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the function is applied: + * 0 or 'index': apply function to each column. + Note: axis=0 is not yet supported. + * 1 or 'columns': apply function to each row. + + """ + if axis != 1: + raise ValueError( + "DataFrame.apply currently only supports row wise ops" + ) + return func(self) - #return super()._apply(func) @applyutils.doc_apply() From 22d610f17226c4544e3297c3146c9abd80f2d2a9 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 24 May 2021 11:26:41 +0100 Subject: [PATCH 075/132] Add compilation tests for masked extensions + fix This fixes the lowering of + Masked. All tests in test_extension_compilation are currently passing. --- python/cudf/cudf/core/udf/lowering.py | 29 +++-- python/cudf/cudf/core/udf/typing.py | 1 - .../cudf/tests/test_extension_compilation.py | 117 ++++++++++++++++++ 3 files changed, 137 insertions(+), 10 deletions(-) create mode 100644 python/cudf/cudf/tests/test_extension_compilation.py diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 6753351b2d1..50476ebf795 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -51,7 +51,6 @@ def masked_scalar_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + `MaskedType` ''' - masked_type_1, masked_type_2 = sig.args # MaskedType(...), MaskedType(...) masked_return_type = sig.return_type # MaskedType(...) @@ -120,15 +119,23 @@ def masked_scalar_null_op_impl(context, builder, sig, args): result.valid = context.get_constant(types.boolean, 0) return result._getvalue() + def make_const_op(op): def masked_scalar_const_op_impl(context, builder, sig, input_values): ''' Implement `MaskedType` + constant ''' - masked_type, const_type = sig.args + # Which way round are the argument types? + if isinstance(sig.args[0], MaskedType): + masked_type, const_type = sig.args + masked_value, numeric_value = input_values + else: + const_type, masked_type = sig.args + numeric_value, masked_value = input_values + return_type = sig.return_type indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( - context, builder, value=input_values[0] + context, builder, value=masked_value ) result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( context, builder @@ -137,23 +144,25 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): with builder.if_then(indata.valid): result.value = context.compile_internal( - builder, - lambda x, y: op(x, y), + builder, + lambda x, y: op(x, y), nb_signature( - return_type.value_type, - masked_type.value_type, + return_type.value_type, + masked_type.value_type, const_type ), - (indata.value, input_values[1]) + (indata.value, numeric_value) ) result.valid = context.get_constant(types.boolean, 1) return result._getvalue() return masked_scalar_const_op_impl + def register_const_op(op): to_lower_op = make_const_op(op) cuda_lower(op, MaskedType, types.Number)(to_lower_op) + cuda_lower(op, types.Number, MaskedType)(to_lower_op) # register all lowering at init @@ -190,7 +199,7 @@ def masked_scalar_truth_impl(context, builder, sig, args): return indata.value @cuda_lower(bool, MaskedType) -def masked_scalar_truth_impl(context, builder, sig, args): +def masked_scalar_bool_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( context, builder, value=args[0] ) @@ -207,6 +216,7 @@ def cast_primitive_to_masked(context, builder, fromty, toty, val): ext.valid = context.get_constant(types.boolean, 1) return ext._getvalue() + @cuda_impl_registry.lower_cast(NAType, MaskedType) def cast_na_to_masked(context, builder, fromty, toty, val): result = cgutils.create_struct_proxy(toty)(context, builder) @@ -214,6 +224,7 @@ def cast_na_to_masked(context, builder, fromty, toty, val): return result._getvalue() + @cuda_impl_registry.lower_cast(MaskedType, MaskedType) def cast_masked_to_masked(context, builder, fromty, toty, val): operand = cgutils.create_struct_proxy(fromty)(context, builder, value=val) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 872afc35cde..8f967bb4cd1 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -222,7 +222,6 @@ def generic(self, args, kws): elif isinstance(args[0], types.Number) and isinstance( args[1], MaskedType ): - breakpoint() return_type = self.context.resolve_function_type( self.key, (args[1].value_type, args[0]), kws ).return_type diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py new file mode 100644 index 00000000000..10b60c5ca81 --- /dev/null +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -0,0 +1,117 @@ +import operator +import pytest + +from numba import types +from numba.cuda import compile_ptx + +from cudf import NA +from cudf.core.udf.typing import MaskedType + +arith_ops = ( + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + operator.pow, +) + +number_types = ( + types.float32, + types.float64, + types.int8, + types.int16, + types.int32, + types.int64, + types.uint8, + types.uint16, + types.uint32, + types.uint64, +) + +QUICK = False + +if QUICK: + arith_ops = (operator.add, operator.truediv, operator.pow) + number_types = (types.int32, types.float32) + + +number_ids = tuple(str(t) for t in number_types) + + +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize('constant', [1, 1.5]) +def test_compile_arith_masked_vs_constant(op, ty, constant): + + def func(x): + return op(x, constant) + + cc = (7, 5) + ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) + + assert isinstance(resty, MaskedType) + + # Check that the masked typing matches that of the unmasked typing + um_ptx, um_resty = compile_ptx(func, (ty,), cc=cc, device=True) + assert resty.value_type == um_resty + + +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize('constant', [1, 1.5]) +def test_compile_arith_constant_vs_masked(op, ty, constant): + + def func(x): + return op(constant, x) + + cc = (7, 5) + ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) + + assert isinstance(resty, MaskedType) + + +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_compile_arith_masked_vs_na(op, ty): + + def func(x): + return op(x, NA) + + cc = (7, 5) + ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) + + assert isinstance(resty, MaskedType) + + +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_compile_arith_na_vs_masked(op, ty): + + def func(x): + return op(x, NA) + + cc = (7, 5) + ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) + + +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty1', number_types, ids=number_ids) +@pytest.mark.parametrize('ty2', number_types, ids=number_ids) +@pytest.mark.parametrize('masked', ((False, True), (True, False), + (True, True)), + ids=('um', 'mu', 'mm')) +def test_compile_arith_masked_ops(op, ty1, ty2, masked): + + def func(x, y): + return op(x, y) + + cc = (7, 5) + + if masked[0]: + ty1 = MaskedType(ty1) + if masked[1]: + ty2 = MaskedType(ty2) + + ptx, resty = compile_ptx(func, (ty1, ty2), cc=cc, device=True) From 8a1b053b809504af113cb273cd31a77cc44dfea2 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 24 May 2021 11:51:53 +0100 Subject: [PATCH 076/132] Fix flake8 in masked ops code --- python/cudf/cudf/core/udf/lowering.py | 45 +++++++++----- python/cudf/cudf/core/udf/pipeline.py | 3 +- python/cudf/cudf/core/udf/typing.py | 55 +++++++++------- python/cudf/cudf/tests/test_udf_masked_ops.py | 62 ++++++++++--------- 4 files changed, 95 insertions(+), 70 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 50476ebf795..fcb865be5dd 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -10,7 +10,6 @@ from numba.extending import types from llvmlite import ir -import operator arith_ops = [ operator.add, operator.sub, @@ -23,7 +22,7 @@ ] comparison_ops = [ - operator.eq, + operator.eq, operator.ne, operator.lt, operator.le, @@ -31,14 +30,16 @@ operator.ge ] + @cuda_lowering_registry.lower_constant(NAType) def constant_na(context, builder, ty, pyval): # This handles None, etc. return context.get_dummy_value() -# In the typing phase, we declared that a `MaskedType` can be -# added to another `MaskedType` and specified what kind of -# `MaskedType` would result. Now we have to actually fill in + +# In the typing phase, we declared that a `MaskedType` can be +# added to another `MaskedType` and specified what kind of a +# `MaskedType` would result. Now we have to actually fill in # the implementation details of how to do that. This is where # we can involve both validities in constructing the answer @@ -51,8 +52,10 @@ def masked_scalar_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + `MaskedType` ''' - masked_type_1, masked_type_2 = sig.args # MaskedType(...), MaskedType(...) - masked_return_type = sig.return_type # MaskedType(...) + # MaskedType(...), MaskedType(...) + masked_type_1, masked_type_2 = sig.args + # MaskedType(...) + masked_return_type = sig.return_type # Let there be two actual LLVM structs backing the two inputs # https://mapping-high-level-constructs-to-llvm-ir.readthedocs.io/en/latest/basic-constructs/structures.html @@ -64,7 +67,8 @@ def masked_scalar_op_impl(context, builder, sig, args): ) # we will return an output struct - result = cgutils.create_struct_proxy(masked_return_type)(context, builder) + result = cgutils.create_struct_proxy(masked_return_type)(context, + builder) # compute output validity valid = builder.and_(m1.valid, m2.valid) @@ -74,11 +78,11 @@ def masked_scalar_op_impl(context, builder, sig, args): # operations on mixed types, by compiling the final core op between # the two primitive values as a separate function and calling it result.value = context.compile_internal( - builder, - lambda x, y: op(x, y), + builder, + lambda x, y: op(x, y), nb_signature( - masked_return_type.value_type, - masked_type_1.value_type, + masked_return_type.value_type, + masked_type_1.value_type, masked_type_2.value_type ), (m1.value, m2.value) @@ -89,13 +93,13 @@ def masked_scalar_op_impl(context, builder, sig, args): def register_arithmetic_op(op): ''' - Register a lowering implementation for the + Register a lowering implementation for the arithmetic op `op`. Because the lowering implementations compile the final op separately using a lambda and compile_internal, `op` needs to be tied to each lowering implementation using - a closure. + a closure. This function makes and lowers a closure for one op. @@ -103,6 +107,7 @@ def register_arithmetic_op(op): to_lower_op = make_arithmetic_op(op) cuda_lower(op, MaskedType, MaskedType)(to_lower_op) + def masked_scalar_null_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + `NAType` @@ -110,7 +115,7 @@ def masked_scalar_null_op_impl(context, builder, sig, args): needs to take place ''' - return_type = sig.return_type # MaskedType(...) + return_type = sig.return_type # MaskedType(...) result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( context, builder ) @@ -134,10 +139,12 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): numeric_value, masked_value = input_values return_type = sig.return_type - indata = cgutils.create_struct_proxy(MaskedType(masked_type.value_type))( + masked_input_type = MaskedType(masked_type.value_type) + indata = cgutils.create_struct_proxy(masked_input_type)( context, builder, value=masked_value ) - result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( + masked_return_type = MaskedType(return_type.value_type) + result = cgutils.create_struct_proxy(masked_return_type)( context, builder ) result.valid = context.get_constant(types.boolean, 0) @@ -173,6 +180,7 @@ def register_const_op(op): cuda_lower(op, MaskedType, NAType)(masked_scalar_null_op_impl) cuda_lower(op, NAType, MaskedType)(masked_scalar_null_op_impl) + @cuda_lower(operator.is_, MaskedType, NAType) def masked_scalar_is_null_impl(context, builder, sig, args): ''' @@ -191,6 +199,7 @@ def masked_scalar_is_null_impl(context, builder, sig, args): return builder.load(result) + @cuda_lower(operator.truth, MaskedType) def masked_scalar_truth_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( @@ -198,6 +207,7 @@ def masked_scalar_truth_impl(context, builder, sig, args): ) return indata.value + @cuda_lower(bool, MaskedType) def masked_scalar_bool_impl(context, builder, sig, args): indata = cgutils.create_struct_proxy(MaskedType(types.boolean))( @@ -205,6 +215,7 @@ def masked_scalar_bool_impl(context, builder, sig, args): ) return indata.value + # To handle the unification, we need to support casting from any type to an # extension type. The cast implementation takes the value passed in and returns # an extension struct wrapping that value. diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py index f85c1ffdd28..ab80400eb29 100644 --- a/python/cudf/cudf/core/udf/pipeline.py +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -1,8 +1,8 @@ -from inspect import signature as py_signature from cudf.core.udf.typing import MaskedType from numba.np import numpy_support from numba import cuda + def compile_udf(func, dtypes): ''' Generate an inlineable PTX function that will be injected into @@ -25,6 +25,7 @@ def compile_udf(func, dtypes): return numpy_output_type, ptx + def nulludf(func): ''' Mimic pandas API: diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 8f967bb4cd1..713ccb26486 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -20,7 +20,7 @@ comparison_ops = [ - operator.eq, + operator.eq, operator.ne, operator.lt, operator.le, @@ -28,9 +28,10 @@ operator.ge ] + class MaskedType(types.Type): ''' - A numba type consiting of a value of some primitive type + A numba type consiting of a value of some primitive type and a validity boolean, over which we can define math ops ''' def __init__(self, value): @@ -45,42 +46,42 @@ def __repr__(self): def __hash__(self): ''' Needed so that numba caches type instances with different - `value_type` separately. + `value_type` separately. ''' return self.__repr__().__hash__() def unify(self, context, other): ''' Logic for sorting out what to do when the UDF conditionally - returns a `MaskedType`, an `NAType`, or a literal based off + returns a `MaskedType`, an `NAType`, or a literal based off the data at runtime. In this framework, every input column is treated as having - type `MaskedType`. Operations like `x + y` are understood + type `MaskedType`. Operations like `x + y` are understood as translating to: `Masked(value=x, valid=True) + Masked(value=y, valid=True)` - This means if the user writes a function such as + This means if the user writes a function such as def f(x, y): return x + y - + numba sees this function as: f(x: MaskedType, y: MaskedType) -> MaskedType - + However if the user writes something like: def f(x, y): if x > 5: return 42 else: return x + y - + numba now sees this as f(x: MaskedType(dtype_1), y: MaskedType(dtype_2)) - -> MaskedType(dtype_unified) + -> MaskedType(dtype_unified) ''' - - # If we have Masked and NA, the output should be a + + # If we have Masked and NA, the output should be a # MaskedType with the original type as its value_type if isinstance(other, NAType): return self @@ -93,21 +94,22 @@ def f(x, y): return None return MaskedType(unified) - + def __eq__(self, other): if not isinstance(other, MaskedType): return False return self.value_type == other.value_type + # Tell numba how `MaskedType` is constructed on the backend in terms # of primitive things that exist at the LLVM level @register_model(MaskedType) class MaskedModel(models.StructModel): def __init__(self, dmm, fe_type): # This struct has two members, a value and a validity - # let the type of the `value` field be the same as the - # `value_type` and let `valid` be a boolean + # let the type of the `value` field be the same as the + # `value_type` and let `valid` be a boolean members = [("value", fe_type.value_type), ("valid", types.bool_)] models.StructModel.__init__(self, dmm, fe_type, members) @@ -116,9 +118,9 @@ class NAType(types.Type): ''' A type for handling ops against nulls Exists so we can: - 1. Teach numba that all occurances of `cudf.NA` are + 1. Teach numba that all occurances of `cudf.NA` are to be read as instances of this type instead - 2. Define ops like `if x is cudf.NA` where `x` is of + 2. Define ops like `if x is cudf.NA` where `x` is of type `Masked` to mean `if x.valid is False` ''' def __init__(self): @@ -138,6 +140,7 @@ def unify(self, context, other): else: return MaskedType(other) + @typeof_impl.register(_NAType) def typeof_na(val, c): ''' @@ -147,12 +150,13 @@ def typeof_na(val, c): ''' return NAType() + register_model(NAType)(models.OpaqueModel) # Ultimately, we want numba to produce PTX code that specifies how to add # two singular `Masked` structs together, which is defined as producing a -# new `Masked` with the right validity and if valid, the correct value. +# new `Masked` with the right validity and if valid, the correct value. # This happens in two phases: # 1. Specify that `Masked` + `Masked` exists and what it should return # 2. Implement how to actually do (1) at the LLVM level @@ -168,8 +172,8 @@ def generic(self, args, kws): ''' if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): # In the case of op(Masked, Masked), the return type is a Masked - # such that Masked.value is the primitive type that would have - # been resolved if we were just adding the `value_type`s. + # such that Masked.value is the primitive type that would have + # been resolved if we were just adding the `value_type`s. return_type = self.context.resolve_function_type( self.key, (args[0].value_type, args[1].value_type), kws ).return_type @@ -179,6 +183,7 @@ def generic(self, args, kws): args[1], ) + class MaskedScalarNullOp(AbstractTemplate): def generic(self, args, kws): ''' @@ -200,6 +205,7 @@ def generic(self, args, kws): args[1] ) + class MaskedScalarConstOp(AbstractTemplate): def generic(self, args, kws): ''' @@ -230,7 +236,8 @@ def generic(self, args, kws): args[0], args[1], ) - + + @cuda_decl_registry.register_global(operator.is_) class MaskedScalarIsNull(AbstractTemplate): ''' @@ -239,10 +246,11 @@ class MaskedScalarIsNull(AbstractTemplate): def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): return nb_signature( - types.boolean, - args[0], + types.boolean, + args[0], NAType()) + @cuda_decl_registry.register_global(operator.truth) class MaskedScalarTruth(AbstractTemplate): ''' @@ -258,6 +266,7 @@ def generic(self, args, kws): MaskedType(types.boolean) ) + for op in arith_ops + comparison_ops: # Every op shares the same typing class cuda_decl_registry.register_global(op)(MaskedScalarArithOp) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 700c57cdc6c..90c93532cb0 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -2,7 +2,6 @@ from cudf.core.udf.pipeline import nulludf from cudf.tests.utils import assert_eq, NUMERIC_TYPES import pandas as pd -import itertools import pytest import operator @@ -18,7 +17,7 @@ ] comparison_ops = [ - operator.eq, + operator.eq, operator.ne, operator.lt, operator.le, @@ -26,6 +25,7 @@ operator.ge ] + def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): gdf = data pdf = data.to_pandas(nullable=True) @@ -39,16 +39,17 @@ def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): obtain = gdf.apply( lambda row: func_gdf( *[row[i] for i in data.columns] - ), - axis=1 + ), + axis=1 ) assert_eq(expect, obtain, **kwargs) + @pytest.mark.parametrize('op', arith_ops) def test_arith_masked_vs_masked(op): # This test should test all the typing # and lowering for arithmetic ops between - # two columns + # two columns def func_pdf(x, y): return op(x, y) @@ -57,15 +58,16 @@ def func_gdf(x, y): return op(x, y) gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] + 'a': [1, None, 3, None], + 'b': [4, 5, None, None] }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + @pytest.mark.parametrize('op', comparison_ops) def test_compare_masked_vs_masked(op): - # this test should test all the - # typing and lowering for comparisons + # this test should test all the + # typing and lowering for comparisons # between columns def func_pdf(x, y): @@ -83,23 +85,25 @@ def func_gdf(x, y): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + @pytest.mark.parametrize('op', arith_ops) @pytest.mark.parametrize('constant', [1, 1.5]) def test_arith_masked_vs_constant(op, constant): def func_pdf(x): return op(x, constant) - + @nulludf def func_gdf(x): return op(x, constant) # Just a single column -> result will be all NA gdf = cudf.DataFrame({ - 'data': [1,2,None] + 'data': [1, 2, None] }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + @pytest.mark.parametrize('op', comparison_ops) @pytest.mark.parametrize('constant', [1, 1.5]) def test_compare_masked_vs_constant(op, constant): @@ -108,19 +112,18 @@ def test_compare_masked_vs_constant(op, constant): ''' def func_pdf(x): return op(x, constant) - + @nulludf def func_gdf(x): return op(x, constant) # Just a single column -> result will be all NA gdf = cudf.DataFrame({ - 'data': [1,2,None] + 'data': [1, 2, None] }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - @pytest.mark.parametrize('op', arith_ops) def test_arith_masked_vs_null(op): def func_pdf(x): @@ -135,6 +138,7 @@ def func_gdf(x): }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + @pytest.mark.parametrize('op', arith_ops) def test_arith_masked_vs_null_reflected(op): def func_pdf(x): @@ -164,10 +168,9 @@ def func_gdf(x, y): else: return x + y - gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] + 'a': [1, None, 3, None], + 'b': [4, 5, None, None] }) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @@ -177,19 +180,19 @@ def func_gdf(x, y): def test_apply_mixed_dtypes(dtype_a, dtype_b): def func_pdf(x, y): return x + y - + @nulludf def func_gdf(x, y): return x + y gdf = cudf.DataFrame({ - 'a':[1.5,None,3, None], - 'b':[4,5,None, None] + 'a': [1.5, None, 3, None], + 'b': [4, 5, None, None] }) gdf['a'] = gdf['a'].astype(dtype_a) gdf['b'] = gdf['b'].astype(dtype_b) - run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @pytest.mark.parametrize('val', [ @@ -205,17 +208,17 @@ def func_pdf(x, y): @nulludf def func_gdf(x, y): if x is cudf.NA: - return val # Masked(5, True) + return val # Masked(5, True) else: return x + y - gdf = cudf.DataFrame({ - 'a':[1,None,3, None], - 'b':[4,5,None, None] + 'a': [1, None, 3, None], + 'b': [4, 5, None, None] }) - run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + def test_apply_return_null(): ''' @@ -225,7 +228,7 @@ def func_pdf(x): if x is pd.NA: return pd.NA else: - return x + return x @nulludf def func_gdf(x): @@ -235,7 +238,8 @@ def func_gdf(x): return x gdf = cudf.DataFrame({'a': [1, None, 3]}) - run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + def test_apply_return_either_null_or_literal(): def func_pdf(x): @@ -252,4 +256,4 @@ def func_gdf(x): return cudf.NA gdf = cudf.DataFrame({'a': [1, 3, 6]}) - run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 58dab996fc42f3a82a3b4228565114b49314c187 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 24 May 2021 12:14:20 +0100 Subject: [PATCH 077/132] Add tests of comparisons, start testing unary ops --- .../cudf/tests/test_extension_compilation.py | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 10b60c5ca81..978315fe7b8 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -17,6 +17,22 @@ operator.pow, ) +comparison_ops = ( + operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.ge, + operator.gt +) + +unary_ops = ( + operator.not_, + operator.truth +) + +ops = arith_ops + comparison_ops + number_types = ( types.float32, types.float64, @@ -40,7 +56,18 @@ number_ids = tuple(str(t) for t in number_types) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('op', unary_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_compile_masked_unary(op, ty): + + def func(x): + return op(x) + + cc = (7, 5) + ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) + + +@pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty', number_types, ids=number_ids) @pytest.mark.parametrize('constant', [1, 1.5]) def test_compile_arith_masked_vs_constant(op, ty, constant): @@ -58,7 +85,7 @@ def func(x): assert resty.value_type == um_resty -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty', number_types, ids=number_ids) @pytest.mark.parametrize('constant', [1, 1.5]) def test_compile_arith_constant_vs_masked(op, ty, constant): @@ -72,7 +99,7 @@ def func(x): assert isinstance(resty, MaskedType) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty', number_types, ids=number_ids) def test_compile_arith_masked_vs_na(op, ty): @@ -85,7 +112,7 @@ def func(x): assert isinstance(resty, MaskedType) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty', number_types, ids=number_ids) def test_compile_arith_na_vs_masked(op, ty): @@ -96,7 +123,7 @@ def func(x): ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty1', number_types, ids=number_ids) @pytest.mark.parametrize('ty2', number_types, ids=number_ids) @pytest.mark.parametrize('masked', ((False, True), (True, False), From 4f064978826b2c6592c0700fa794320852e2d54a Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 24 May 2021 13:41:24 +0100 Subject: [PATCH 078/132] Don't test as-yet unimplemented not --- python/cudf/cudf/tests/test_extension_compilation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 978315fe7b8..1a8367e5532 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -27,8 +27,7 @@ ) unary_ops = ( - operator.not_, - operator.truth + operator.truth, ) ops = arith_ops + comparison_ops @@ -117,7 +116,7 @@ def func(x): def test_compile_arith_na_vs_masked(op, ty): def func(x): - return op(x, NA) + return op(NA, x) cc = (7, 5) ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) From a59b2409ef3551c9e278d15afcddeee0e559f57d Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Mon, 24 May 2021 17:03:03 +0100 Subject: [PATCH 079/132] Add execution test for masked ops This test executes and operation with and without masking and compares the masked result to the unmasked result. Some additional typing and lowering is added to support the construction of test data in a kernel, via the `Masked` constructor. --- python/cudf/cudf/core/udf/classes.py | 4 ++ python/cudf/cudf/core/udf/lowering.py | 14 +++++- python/cudf/cudf/core/udf/typing.py | 43 ++++++++++++++++++- .../cudf/tests/test_extension_compilation.py | 38 +++++++++++++++- 4 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 python/cudf/cudf/core/udf/classes.py diff --git a/python/cudf/cudf/core/udf/classes.py b/python/cudf/cudf/core/udf/classes.py new file mode 100644 index 00000000000..1e979253fba --- /dev/null +++ b/python/cudf/cudf/core/udf/classes.py @@ -0,0 +1,4 @@ +class Masked: + def __init__(self, value, valid): + self.value = value + self.valid = valid diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index fcb865be5dd..3ebec1258dd 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -1,3 +1,4 @@ +from . import classes from numba.cuda.cudaimpl import ( lower as cuda_lower, registry as cuda_lowering_registry, @@ -7,7 +8,7 @@ from numba.core import cgutils from numba.cuda.cudaimpl import registry as cuda_impl_registry import operator -from numba.extending import types +from numba.extending import lower_builtin, types from llvmlite import ir arith_ops = [ @@ -245,3 +246,14 @@ def cast_masked_to_masked(context, builder, fromty, toty, val): ext.value = casted ext.valid = operand.valid return ext._getvalue() + + +# Masked constructor for use in a kernel for testing +@lower_builtin(classes.Masked, types.Number, types.boolean) +def masked_constructor(context, builder, sig, args): + ty = sig.return_type + value, valid = args + masked = cgutils.create_struct_proxy(ty)(context, builder) + masked.value = value + masked.valid = valid + return masked._getvalue() diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 713ccb26486..54733a07aae 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -1,9 +1,14 @@ +from . import classes from numba import types from cudf.core.scalar import _NAType -from numba.core.extending import typeof_impl, register_model, models +from numba.core.extending import (typeof_impl, register_model, models, + make_attribute_wrapper) from numba.cuda.cudadecl import registry as cuda_decl_registry -from numba.core.typing.templates import AbstractTemplate +from numba.core.typing.templates import (AbstractTemplate, AttributeTemplate, + ConcreteTemplate) from numba.core.typing import signature as nb_signature +from numba.core.typing.typeof import typeof + import operator @@ -102,6 +107,40 @@ def __eq__(self, other): return self.value_type == other.value_type +# For typing a Masked constant value defined outside a kernel (e.g. captured in +# a closure). +@typeof_impl.register(classes.Masked) +def typeof_interval(val, c): + return MaskedType(typeof(val)) + + +@cuda_decl_registry.register +class MaskedConstructor(ConcreteTemplate): + key = classes.Masked + + cases = [nb_signature(MaskedType(t), t, types.boolean) + for t in (types.integer_domain | types.real_domain)] + + +make_attribute_wrapper(MaskedType, 'value', 'value') +make_attribute_wrapper(MaskedType, 'valid', 'valid') + + +@cuda_decl_registry.register_attr +class ClassesTemplate(AttributeTemplate): + key = types.Module(classes) + + def resolve_Masked(self, mod): + breakpoint() + return types.Function(MaskedConstructor) + + +# For typing classes.Masked +cuda_decl_registry.register_global(classes, types.Module(classes)) +# For typing bare Masked +cuda_decl_registry.register_global(classes.Masked, types.Function(MaskedConstructor)) + + # Tell numba how `MaskedType` is constructed on the backend in terms # of primitive things that exist at the LLVM level @register_model(MaskedType) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 1a8367e5532..e0cc0239001 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,11 +1,13 @@ import operator import pytest -from numba import types +from numba import cuda, types from numba.cuda import compile_ptx from cudf import NA from cudf.core.udf.typing import MaskedType +from cudf.core.udf.classes import Masked + arith_ops = ( operator.add, @@ -66,6 +68,40 @@ def func(x): ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_execute_masked_binary(op, ty): + + @cuda.jit(device=True) + def func(x, y): + return op(x, y) + + @cuda.jit(debug=True) + def test_kernel(x, y): + # Reference result with unmasked value + u = func(x, y) + + # Construct masked values to test with + x0, y0 = Masked(x, False), Masked(y, False) + x1, y1 = Masked(x, True), Masked(y, True) + + # Call with masked types + r0 = func(x0, y0) + r1 = func(x1, y1) + + # Check masks are as expected, and unmasked result matches masked + # result + if r0.valid: + raise RuntimeError('Expected r0 to be invalid') + if not r1.valid: + raise RuntimeError('Expected r1 to be valid') + if u != r1.value: + print('Values: ', u, r1.value) + raise RuntimeError('u != r1.value') + + test_kernel[1, 1](1, 2) + + @pytest.mark.parametrize('op', ops) @pytest.mark.parametrize('ty', number_types, ids=number_ids) @pytest.mark.parametrize('constant', [1, 1.5]) From e440770335a3ff19c185a6f4739e2905f1557b74 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Tue, 25 May 2021 12:33:46 +0100 Subject: [PATCH 080/132] Begin adding tests for operator.is_ with NA --- .../cudf/tests/test_extension_compilation.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index e0cc0239001..00a924f926a 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -177,3 +177,33 @@ def func(x, y): ty2 = MaskedType(ty2) ptx, resty = compile_ptx(func, (ty1, ty2), cc=cc, device=True) + + +def func_x_is_na(x): + return x is NA + + +def func_na_is_x(x): + return NA is x + + +@pytest.mark.parametrize('fn', (func_x_is_na, func_na_is_x)) +def test_is_na(fn): + + valid = Masked(1, True) + invalid = Masked(1, False) + + device_fn = cuda.jit(device=True)(fn) + + @cuda.jit(debug=True) + def test_kernel(): + valid_result = device_fn(valid) + invalid_result = device_fn(invalid) + + if not valid_result: + raise RuntimeError('Valid masked value is NA and should not be') + + if invalid_result: + raise RuntimeError('Invalid masked value is not NA and should be') + + test_kernel[1, 1]() From a6f67fa3d51de76054516370f2e7e25a4f502a5e Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Tue, 25 May 2021 12:42:32 +0100 Subject: [PATCH 081/132] Some tidy-ups in typing - Edit some comments - No need to keep constructing a new `NAType()`, as it is not parameterized. - Rename `MaskedScalarConstOp` to `MaskedScalarScalarOp` to more closely reflect what it handles - it deals with all scalars because it types things with `types.Number`. (if it would only accept `types.Literal` this would be more like "const"). --- python/cudf/cudf/core/udf/typing.py | 41 +++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 54733a07aae..d3526354d65 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -36,11 +36,11 @@ class MaskedType(types.Type): ''' - A numba type consiting of a value of some primitive type + A Numba type consisting of a value of some primitive type and a validity boolean, over which we can define math ops ''' def __init__(self, value): - # MaskedType in numba shall be parameterized + # MaskedType in Numba shall be parameterized # with a value type super().__init__(name="Masked") self.value_type = value @@ -96,14 +96,19 @@ def f(x, y): # and that other thing unified = context.unify_pairs(self.value_type, other) if unified is None: + # The value types don't unify, so there is no unified masked type return None return MaskedType(unified) def __eq__(self, other): + # Equality is required for determining whether a cast is required + # between two different types. if not isinstance(other, MaskedType): + # Require a cast when the other type is not masked return False + # Require a cast for another masked with a different value type return self.value_type == other.value_type @@ -114,6 +119,8 @@ def typeof_interval(val, c): return MaskedType(typeof(val)) +# Implemented typing for Masked(value, valid) - the construction of a Masked +# type in a kernel. @cuda_decl_registry.register class MaskedConstructor(ConcreteTemplate): key = classes.Masked @@ -122,10 +129,12 @@ class MaskedConstructor(ConcreteTemplate): for t in (types.integer_domain | types.real_domain)] +# Provide access to `m.value` and `m.valid` in a kernel for a Masked `m`. make_attribute_wrapper(MaskedType, 'value', 'value') make_attribute_wrapper(MaskedType, 'valid', 'valid') +# Typing for `classes.Masked` @cuda_decl_registry.register_attr class ClassesTemplate(AttributeTemplate): key = types.Module(classes) @@ -135,10 +144,11 @@ def resolve_Masked(self, mod): return types.Function(MaskedConstructor) -# For typing classes.Masked +# Registration of the global is also needed for Numba to type classes.Masked cuda_decl_registry.register_global(classes, types.Module(classes)) -# For typing bare Masked -cuda_decl_registry.register_global(classes.Masked, types.Function(MaskedConstructor)) +# For typing bare Masked (as in `from .classes import Masked` +cuda_decl_registry.register_global(classes.Masked, + types.Function(MaskedConstructor)) # Tell numba how `MaskedType` is constructed on the backend in terms @@ -180,6 +190,9 @@ def unify(self, context, other): return MaskedType(other) +na_type = NAType() + + @typeof_impl.register(_NAType) def typeof_na(val, c): ''' @@ -187,7 +200,7 @@ def typeof_na(val, c): Effectively make it so numba sees `cudf.NA` as an instance of this NAType -> handle it accordingly. ''' - return NAType() + return na_type register_model(NAType)(models.OpaqueModel) @@ -235,27 +248,27 @@ def generic(self, args, kws): return nb_signature( args[0], args[0], - NAType(), + na_type, ) elif isinstance(args[0], NAType) and isinstance(args[1], MaskedType): return nb_signature( args[1], - NAType(), + na_type, args[1] ) -class MaskedScalarConstOp(AbstractTemplate): +class MaskedScalarScalarOp(AbstractTemplate): def generic(self, args, kws): ''' - Typing for `Masked` + a constant literal + Typing for `Masked` + a scalar. handles situations like `x + 1` ''' if isinstance(args[0], MaskedType) and isinstance( args[1], types.Number ): - # In the case of op(Masked, constant), we resolve the type between - # the Masked value_type and the constant's type directly + # In the case of op(Masked, scalar), we resolve the type between + # the Masked value_type and the scalar's type directly return_type = self.context.resolve_function_type( self.key, (args[0].value_type, args[1]), kws ).return_type @@ -287,7 +300,7 @@ def generic(self, args, kws): return nb_signature( types.boolean, args[0], - NAType()) + na_type) @cuda_decl_registry.register_global(operator.truth) @@ -310,4 +323,4 @@ def generic(self, args, kws): # Every op shares the same typing class cuda_decl_registry.register_global(op)(MaskedScalarArithOp) cuda_decl_registry.register_global(op)(MaskedScalarNullOp) - cuda_decl_registry.register_global(op)(MaskedScalarConstOp) + cuda_decl_registry.register_global(op)(MaskedScalarScalarOp) From d9e8fdb96303d4f32ed5d8eeb7aaccbffb887a11 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Tue, 25 May 2021 21:55:50 +0100 Subject: [PATCH 082/132] Fix test and implementation of `is NA` --- python/cudf/cudf/core/udf/lowering.py | 21 ++++++++++++++++--- python/cudf/cudf/core/udf/typing.py | 9 ++++++-- .../cudf/tests/test_extension_compilation.py | 8 +++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 3ebec1258dd..35492130e26 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -183,13 +183,20 @@ def register_const_op(op): @cuda_lower(operator.is_, MaskedType, NAType) +@cuda_lower(operator.is_, NAType, MaskedType) def masked_scalar_is_null_impl(context, builder, sig, args): ''' - Implement `MaskedType` + constant + Implement `MaskedType` is `NA` ''' - masked_type, na = sig.args + if isinstance(sig.args[1], NAType): + masked_type, na = sig.args + value = args[0] + else: + na, masked_type = sig.args + value = args[1] + indata = cgutils.create_struct_proxy(masked_type)( - context, builder, value=args[0] + context, builder, value=value ) result = cgutils.alloca_once(builder, ir.IntType(1)) with builder.if_else(indata.valid) as (then, otherwise): @@ -257,3 +264,11 @@ def masked_constructor(context, builder, sig, args): masked.value = value masked.valid = valid return masked._getvalue() + + +@cuda_impl_registry.lower_constant(MaskedType) +def lower_constant_masked(context, builder, ty, val): + masked = cgutils.create_struct_proxy(ty)(context, builder) + masked.value = context.get_constant(ty.value_type, val.value) + masked.valid = context.get_constant(types.boolean, val.valid) + return masked._getvalue() diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index d3526354d65..afc3547c17f 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -115,8 +115,8 @@ def __eq__(self, other): # For typing a Masked constant value defined outside a kernel (e.g. captured in # a closure). @typeof_impl.register(classes.Masked) -def typeof_interval(val, c): - return MaskedType(typeof(val)) +def typeof_masked(val, c): + return MaskedType(typeof(val.value)) # Implemented typing for Masked(value, valid) - the construction of a Masked @@ -301,6 +301,11 @@ def generic(self, args, kws): types.boolean, args[0], na_type) + elif isinstance(args[1], MaskedType) and isinstance(args[0], NAType): + return nb_signature( + types.boolean, + na_type, + args[1]) @cuda_decl_registry.register_global(operator.truth) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 00a924f926a..6aa5a6d5c86 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -197,13 +197,13 @@ def test_is_na(fn): @cuda.jit(debug=True) def test_kernel(): - valid_result = device_fn(valid) - invalid_result = device_fn(invalid) + valid_is_na = device_fn(valid) + invalid_is_na = device_fn(invalid) - if not valid_result: + if valid_is_na: raise RuntimeError('Valid masked value is NA and should not be') - if invalid_result: + if not invalid_is_na: raise RuntimeError('Invalid masked value is not NA and should be') test_kernel[1, 1]() From 1d6755a86a29de178e2fe3b20b12691074eccdcf Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Wed, 26 May 2021 21:12:22 +0100 Subject: [PATCH 083/132] Add tests of comparison with NA behaviour In a slight deviation from pandas (and Python for comparisons with None) semantics, any comparison with NA returns False. Comparisons between NA and non-masked values require some additional implementation (ideally a typing + lowering for a correct cast). --- .../cudf/tests/test_extension_compilation.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 6aa5a6d5c86..efd0509f475 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -207,3 +207,111 @@ def test_kernel(): raise RuntimeError('Invalid masked value is not NA and should be') test_kernel[1, 1]() + + +def func_lt_na(x): + return x < NA + + +def func_gt_na(x): + return x > NA + + +def func_eq_na(x): + return x == NA + + +def func_ne_na(x): + return x != NA + + +def func_ge_na(x): + return x >= NA + + +def func_le_na(x): + return x <= NA + + +def func_na_lt(x): + return x < NA + + +def func_na_gt(x): + return x > NA + + +def func_na_eq(x): + return x == NA + + +def func_na_ne(x): + return x != NA + + +def func_na_ge(x): + return x >= NA + + +def func_na_le(x): + return x <= NA + + +na_comparison_funcs = ( + func_lt_na, + func_gt_na, + func_eq_na, + func_ne_na, + func_ge_na, + func_le_na, + func_na_lt, + func_na_gt, + func_na_eq, + func_na_ne, + func_na_ge, + func_na_le, +) + + +@pytest.mark.parametrize('fn', na_comparison_funcs) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_na_masked_comparisons(fn, ty): + + device_fn = cuda.jit(device=True)(fn) + + @cuda.jit(debug=True) + def test_kernel(): + unmasked = ty(1) + valid_masked = Masked(unmasked, True) + invalid_masked = Masked(unmasked, False) + + valid_cmp_na = device_fn(valid_masked) + invalid_cmp_na = device_fn(invalid_masked) + + if valid_cmp_na: + raise RuntimeError('Valid masked value compared True with NA') + + if invalid_cmp_na: + raise RuntimeError('Invalid masked value compared True with NA') + + test_kernel[1, 1]() + + +# xfail because scalars do not yet cast for a comparison to NA +@pytest.mark.xfail +@pytest.mark.parametrize('fn', na_comparison_funcs) +@pytest.mark.parametrize('ty', number_types, ids=number_ids) +def test_na_scalar_comparisons(fn, ty): + + device_fn = cuda.jit(device=True)(fn) + + @cuda.jit(debug=True) + def test_kernel(): + unmasked = ty(1) + + unmasked_cmp_na = device_fn(unmasked) + + if unmasked_cmp_na: + raise RuntimeError('Unmasked value compared True with NA') + + test_kernel[1, 1]() From 671792c1dd1c96c36223608583645b50ed30df01 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 28 May 2021 11:58:32 -0700 Subject: [PATCH 084/132] test reflected const/masked ops - separate lowering to account for non-commutative ops --- python/cudf/cudf/core/udf/lowering.py | 47 +++++++++++++++---- python/cudf/cudf/tests/test_udf_masked_ops.py | 16 +++++++ 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 35492130e26..e617b7cc5a7 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -127,17 +127,12 @@ def masked_scalar_null_op_impl(context, builder, sig, args): def make_const_op(op): - def masked_scalar_const_op_impl(context, builder, sig, input_values): + def masked_scalar_const_op_impl(context, builder, sig, args): ''' Implement `MaskedType` + constant ''' - # Which way round are the argument types? - if isinstance(sig.args[0], MaskedType): - masked_type, const_type = sig.args - masked_value, numeric_value = input_values - else: - const_type, masked_type = sig.args - numeric_value, masked_value = input_values + masked_type, const_type = sig.args + masked_value, numeric_value = args return_type = sig.return_type masked_input_type = MaskedType(masked_type.value_type) @@ -150,7 +145,6 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): ) result.valid = context.get_constant(types.boolean, 0) with builder.if_then(indata.valid): - result.value = context.compile_internal( builder, lambda x, y: op(x, y), @@ -166,11 +160,44 @@ def masked_scalar_const_op_impl(context, builder, sig, input_values): return result._getvalue() return masked_scalar_const_op_impl +def make_reflected_const_op(op): + def masked_scalar_reflected_const_op_impl(context, builder, sig, args): + const_type, masked_type = sig.args + numeric_value, masked_value = args + + return_type = sig.return_type + masked_input_type = MaskedType(masked_type.value_type) + indata = cgutils.create_struct_proxy(masked_input_type)( + context, builder, value=masked_value + ) + masked_return_type = MaskedType(return_type.value_type) + result = cgutils.create_struct_proxy(masked_return_type)( + context, builder + ) + result.valid = context.get_constant(types.boolean, 0) + with builder.if_then(indata.valid): + result.value = context.compile_internal( + builder, + lambda x, y: op(x, y), + nb_signature( + return_type.value_type, + const_type, + masked_type.value_type + ), + (numeric_value, indata.value) + ) + result.valid = context.get_constant(types.boolean, 1) + + return result._getvalue() + return masked_scalar_reflected_const_op_impl + def register_const_op(op): to_lower_op = make_const_op(op) cuda_lower(op, MaskedType, types.Number)(to_lower_op) - cuda_lower(op, types.Number, MaskedType)(to_lower_op) + + to_lower_op_reflected = make_reflected_const_op(op) + cuda_lower(op, types.Number, MaskedType)(to_lower_op_reflected) # register all lowering at init diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 90c93532cb0..77c082ba9a6 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -103,6 +103,22 @@ def func_gdf(x): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) +@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize('constant', [1, 1.5]) +def test_arith_masked_vs_constant_reflected(op, constant): + def func_pdf(x): + return op(constant, x) + + @nulludf + def func_gdf(x): + return op(constant, x) + + # Just a single column -> result will be all NA + gdf = cudf.DataFrame({ + 'data': [1, 2, None] + }) + + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @pytest.mark.parametrize('op', comparison_ops) @pytest.mark.parametrize('constant', [1, 1.5]) From c3007dec0f550bf6346879e277a3b0749a0198cc Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 28 May 2021 13:08:46 -0700 Subject: [PATCH 085/132] unify masked with masked --- python/cudf/cudf/core/udf/typing.py | 5 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 73 +++++++++++++------ 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index afc3547c17f..49668869eff 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -43,6 +43,8 @@ def __init__(self, value): # MaskedType in Numba shall be parameterized # with a value type super().__init__(name="Masked") + if isinstance(value, MaskedType): + breakpoint() self.value_type = value def __repr__(self): @@ -90,6 +92,8 @@ def f(x, y): # MaskedType with the original type as its value_type if isinstance(other, NAType): return self + elif isinstance(other, MaskedType): + return MaskedType(context.unify_pairs(self.value_type, other.value_type)) # if we have MaskedType and something that results in a # scalar, unify between the MaskedType's value_type @@ -140,7 +144,6 @@ class ClassesTemplate(AttributeTemplate): key = types.Module(classes) def resolve_Masked(self, mod): - breakpoint() return types.Function(MaskedConstructor) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 77c082ba9a6..6b50ca8dd7e 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -120,25 +120,6 @@ def func_gdf(x): run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', comparison_ops) -@pytest.mark.parametrize('constant', [1, 1.5]) -def test_compare_masked_vs_constant(op, constant): - ''' - technically the exact same test as above - ''' - def func_pdf(x): - return op(x, constant) - - @nulludf - def func_gdf(x): - return op(x, constant) - - # Just a single column -> result will be all NA - gdf = cudf.DataFrame({ - 'data': [1, 2, None] - }) - run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) - @pytest.mark.parametrize('op', arith_ops) def test_arith_masked_vs_null(op): @@ -194,6 +175,12 @@ def func_gdf(x, y): @pytest.mark.parametrize('dtype_a', list(NUMERIC_TYPES)) @pytest.mark.parametrize('dtype_b', list(NUMERIC_TYPES)) def test_apply_mixed_dtypes(dtype_a, dtype_b): + ''' + Test that operations can be performed between columns + of different dtypes and return a column with the correct + values and nulls + ''' + # TODO: Parameterize over the op here def func_pdf(x, y): return x + y @@ -215,16 +202,21 @@ def func_gdf(x, y): 5, 5.5 ]) def test_apply_return_literal(val): + ''' + Test unification codepath for scalars and MaskedType + makes sure that numba knows how to cast a scalar value + to a MaskedType + ''' def func_pdf(x, y): - if x is pd.NA: + if x is not pd.NA and x < 2: return val else: return x + y @nulludf def func_gdf(x, y): - if x is cudf.NA: - return val # Masked(5, True) + if x is not cudf.NA and x < 2: + return val else: return x + y @@ -273,3 +265,40 @@ def func_gdf(x): gdf = cudf.DataFrame({'a': [1, 3, 6]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) + + +def test_apply_everything(): + def func_pdf(w, x, y, z): + if x is pd.NA: + return w + y - z + elif ((z > y) is not pd.NA) and z > y: + return x + elif ((x + y) is not pd.NA) and x + y == 0: + return z / x + elif x + y is pd.NA: + return 2.5 + else: + return (y > 2) + + @nulludf + def func_gdf(w, x, y, z): + if x is cudf.NA: + return w + y - z + elif ((z > y) is not cudf.NA) and z > y: + return x + elif ((x + y) is not cudf.NA) and x + y == 0: + return z / x + elif x + y is cudf.NA: + return 2.5 + else: + return (y > 2) + + gdf = cudf.DataFrame( + { + 'a': [1, 3, 6, 0, None, 5, None], + 'b': [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0], + 'c': [2, 3, 6, 0, None, 5, None], + 'd': [4, None, 6, 0, None, 5, None], + } + ) + run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 100ac44645fa5b5d364b751f858ccb242cca5306 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 28 May 2021 14:41:48 -0700 Subject: [PATCH 086/132] allocate and build the final column in libcudf rather than cython --- cpp/include/cudf/transform.hpp | 2 -- cpp/src/transform/transform.cpp | 21 +++++++++++++-------- python/cudf/cudf/_lib/cpp/transform.pxd | 4 +--- python/cudf/cudf/_lib/transform.pyx | 15 +++------------ python/cudf/cudf/core/frame.py | 9 ++++----- 5 files changed, 21 insertions(+), 30 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 7af849357ec..35ba7fd53f6 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -58,8 +58,6 @@ std::unique_ptr generalized_masked_op( table_view data_view, std::string const& binary_udf, data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index d730ebf5488..b48a3058b31 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -173,18 +173,25 @@ std::unique_ptr transform(column_view const& input, } std::unique_ptr generalized_masked_op(table_view data_view, - std::string const& binary_udf, + std::string const& udf, data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { rmm::cuda_stream_view stream = rmm::cuda_stream_default; - transformation::jit::generalized_operation(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); + std::unique_ptr output = make_fixed_width_column( + output_type, data_view.num_rows() + ); + std::unique_ptr output_mask = make_fixed_width_column( + cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows() + ); - std::unique_ptr output; + mutable_column_view output_view = *output; + mutable_column_view output_mask_view = *output_mask; + transformation::jit::generalized_operation(data_view, udf, output_type, output_view, output_mask_view, mr); + auto final_output_mask = cudf::bools_to_mask(output_mask_view); + output.get()->set_null_mask(std::move(*(final_output_mask.first))); return output; } @@ -205,11 +212,9 @@ std::unique_ptr transform(column_view const& input, std::unique_ptr generalized_masked_op(table_view data_view, std::string const& binary_udf, data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { - return detail::generalized_masked_op(data_view, binary_udf, output_type, outcol_view, outmsk_view, mr); + return detail::generalized_masked_op(data_view, binary_udf, output_type, mr); } } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 82d45811cb3..c8e84a3a2e9 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -40,10 +40,8 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: cdef unique_ptr[column] generalized_masked_op( table_view data_view, - string binary_udf, + string udf, data_type output_type, - column_view outcol_view, - column_view outmask_view, ) except + cdef pair[unique_ptr[table], unique_ptr[column]] encode( diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 63cd3b30a81..23854cd3e98 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -122,34 +122,25 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) -def masked_udf(Table incols, op, Column output_column, Column output_mask): +def masked_udf(Table incols, op, output_type): cdef table_view data_view = incols.data_view() cdef string c_str = op.encode("UTF-8") cdef type_id c_tid cdef data_type c_dtype c_tid = ( - np_to_cudf_types[output_column.dtype] + np_to_cudf_types[output_type] ) c_dtype = data_type(c_tid) - cdef column_view outcol_view = output_column.view() - cdef column_view outmsk_view = output_mask.view() - with nogil: c_output = move(libcudf_transform.generalized_masked_op( data_view, c_str, c_dtype, - outcol_view, - outmsk_view )) - - output_mask_real = bools_to_mask(output_mask) - output_column = output_column.set_mask(output_mask_real) - - return output_column + return Column.from_unique_ptr(move(c_output)) def table_encode(Table input): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index fc354708f4c..86abfffa697 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1455,12 +1455,11 @@ def _quantiles( return result def _apply(self, func): + ''' + Apply `func` across the rows of the frame. + ''' output_dtype, ptx = cudf.core.udf.pipeline.compile_udf(func, self.dtypes) - - output_column = cudf.core.column.column_empty(row_count=len(self), dtype=output_dtype) - output_mask = cudf.core.column.column_empty(row_count=len(self), dtype='bool') - - result = cudf._lib.transform.masked_udf(self, ptx, output_column, output_mask) + result = cudf._lib.transform.masked_udf(self, ptx, output_dtype) return result From 91c91eb0779a99aff8bb90c44add024b83c4a400 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 07:24:04 -0700 Subject: [PATCH 087/132] refactor c++ a bit --- cpp/src/transform/transform.cpp | 37 +++++++++++++-------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index b48a3058b31..777be859180 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -105,44 +105,37 @@ void generalized_operation(table_view data_view, std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - int n_cols = data_view.num_columns(); - std::vector results((n_cols * 3) + 3); + // {size, out_ptr, out_mask_ptr, col0_ptr, col0_mask_ptr, col0_offset, col1_ptr...} + std::vector kernel_args((data_view.num_columns() * 3) + 3); cudf::size_type size = outcol_view.size(); const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view); const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view); + kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr}); + + std::vector data_ptrs(data_view.num_columns()); + std::vector mask_ptrs(data_view.num_columns()); + std::vector offsets(data_view.num_columns()); - results[0] = &size; - results[1] = &outcol_ptr; - results[2] = &outmsk_ptr; column_view col; + for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { + col = data_view.column(col_idx); - std::vector data_ptrs(n_cols); - std::vector mask_ptrs(n_cols); - std::vector offsets(n_cols); + data_ptrs[col_idx] = cudf::jit::get_data_ptr(col); + mask_ptrs[col_idx] = col.null_mask(); + offsets[col_idx] = col.offset(); - for (int i = 0; i < n_cols; i++) { - col = data_view.column(i); - data_ptrs[i] = cudf::jit::get_data_ptr(col); - mask_ptrs[i] = col.null_mask(); - offsets[i] = col.offset(); - } + kernel_args.insert(kernel_args.begin() + 3 * (col_idx + 1), {&data_ptrs[col_idx], &mask_ptrs[col_idx], &offsets[col_idx]}); - int idx = 3; - for (int i = 0; i < n_cols; i++) { - results[idx] = &data_ptrs[i]; - results[idx + 1] = &mask_ptrs[i]; - results[idx + 2] = &offsets[i]; - idx += 3; } - + rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) .get_kernel( generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // - ->launch(results.data()); + ->launch(kernel_args.data()); } From 1fa3cab50e2755ae53fa772998cf761f7e9ce8e7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 07:54:02 -0700 Subject: [PATCH 088/132] use offset_type rather than hardcoding int64_t incorrectly --- cpp/src/transform/transform.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 777be859180..10bbe6dc45a 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -70,7 +70,9 @@ std::vector make_template_types(column_view outcol_view, table_view { int entries_per_col = 3; // type, mask, offset std::string mask_type = "uint32_t*"; - std::string offset_type = "int64_t"; + + // int32_t + std::string offset_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); std::vector template_types( // output type comes first and is one extra @@ -115,7 +117,7 @@ void generalized_operation(table_view data_view, std::vector data_ptrs(data_view.num_columns()); std::vector mask_ptrs(data_view.num_columns()); - std::vector offsets(data_view.num_columns()); + std::vector offsets(data_view.num_columns()); column_view col; for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { @@ -129,7 +131,6 @@ void generalized_operation(table_view data_view, } - rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) .get_kernel( From 6125dc0bf1bce7b767adda70dfb91cd7fea660e0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 09:15:34 -0700 Subject: [PATCH 089/132] a little bit more refactoring --- cpp/src/transform/transform.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 10bbe6dc45a..1a1f4d2128b 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -68,23 +68,21 @@ void unary_operation(mutable_column_view output, std::vector make_template_types(column_view outcol_view, table_view data_view) { - int entries_per_col = 3; // type, mask, offset - std::string mask_type = "uint32_t*"; - - // int32_t + std::string mskptr_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())) + "*"; std::string offset_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); - std::vector template_types( - // output type comes first and is one extra - (data_view.num_columns() * entries_per_col) + 1 - ); + std::vector template_types(1); template_types[0] = cudf::jit::get_type_name(outcol_view.type()); - for (int i = 0; i < data_view.num_columns(); i++) { - int offset = (i * entries_per_col) + 1; - template_types[offset] = cudf::jit::get_type_name(data_view.column(i).type()) + "*"; - template_types[offset + 1] = mask_type; - template_types[offset + 2] = offset_type; + for (auto const& col: data_view) { + auto these_types = { + cudf::jit::get_type_name(col.type()) + "*", + mskptr_type, + offset_type + }; + + template_types.insert(template_types.end(), these_types); + } return template_types; } @@ -99,6 +97,10 @@ void generalized_operation(table_view data_view, { std::vector template_types = make_template_types(outcol_view, data_view); + for (size_t i = 0; i < template_types.size(); i++) { + std::cout << template_types[i] << std::endl; + }; + std::cout << template_types.size() << std::endl; std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") From 59e1209c47def4eb6756c2d86c0ec2eb118a537c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 11:18:44 -0700 Subject: [PATCH 090/132] remove debugging code --- cpp/src/transform/transform.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 1a1f4d2128b..03d8fa11977 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -97,10 +97,6 @@ void generalized_operation(table_view data_view, { std::vector template_types = make_template_types(outcol_view, data_view); - for (size_t i = 0; i < template_types.size(); i++) { - std::cout << template_types[i] << std::endl; - }; - std::cout << template_types.size() << std::endl; std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") From c1324b8ebbcab090e84d52453859099e60558b38 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 11:38:55 -0700 Subject: [PATCH 091/132] move repeated imports to their own function --- python/cudf/cudf/core/udf/_ops.py | 21 +++++++++++++++++++++ python/cudf/cudf/core/udf/lowering.py | 21 +-------------------- python/cudf/cudf/core/udf/typing.py | 23 +---------------------- 3 files changed, 23 insertions(+), 42 deletions(-) create mode 100644 python/cudf/cudf/core/udf/_ops.py diff --git a/python/cudf/cudf/core/udf/_ops.py b/python/cudf/cudf/core/udf/_ops.py new file mode 100644 index 00000000000..0fce3fdbafb --- /dev/null +++ b/python/cudf/cudf/core/udf/_ops.py @@ -0,0 +1,21 @@ +import operator + +arith_ops = [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + operator.pow + +] + +comparison_ops = [ + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge +] diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index e617b7cc5a7..3fffc8ac972 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -11,26 +11,7 @@ from numba.extending import lower_builtin, types from llvmlite import ir -arith_ops = [ - operator.add, - operator.sub, - operator.mul, - operator.truediv, - operator.floordiv, - operator.mod, - operator.pow - -] - -comparison_ops = [ - operator.eq, - operator.ne, - operator.lt, - operator.le, - operator.gt, - operator.ge -] - +from ._ops import arith_ops, comparison_ops @cuda_lowering_registry.lower_constant(NAType) def constant_na(context, builder, ty, pyval): diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 49668869eff..3ed0d0fd265 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -11,28 +11,7 @@ import operator - -arith_ops = [ - operator.add, - operator.sub, - operator.mul, - operator.truediv, - operator.floordiv, - operator.mod, - operator.pow - -] - - -comparison_ops = [ - operator.eq, - operator.ne, - operator.lt, - operator.le, - operator.gt, - operator.ge -] - +from ._ops import arith_ops, comparison_ops class MaskedType(types.Type): ''' From ed79368b27fe9f6fc57a7738d9fc79a5ff40f3eb Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 11:52:26 -0700 Subject: [PATCH 092/132] remove old ipython notebook --- notebooks/udf-nulls-demo-final.ipynb | 1574 -------------------------- 1 file changed, 1574 deletions(-) delete mode 100644 notebooks/udf-nulls-demo-final.ipynb diff --git a/notebooks/udf-nulls-demo-final.ipynb b/notebooks/udf-nulls-demo-final.ipynb deleted file mode 100644 index bb72fd04378..00000000000 --- a/notebooks/udf-nulls-demo-final.ipynb +++ /dev/null @@ -1,1574 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "brutal-princeton", - "metadata": {}, - "source": [ - "# NA in cuDF UDFs\n", - "cuDFs design decision to store null informtion in bitmasks is really smart, and makes things very performant and tractable in memory bound circumstances such as GPU operations. However this design when coupled with the natural inefficiencies that arise from any kind of serial iteration over our data has made `` support in general user defined functions hard to solution for. This notebook offers an approach based on jitting a UDF's arguments as a special custom Numba type to produce a generic PTX function. This function is than inlined into a general kernel in libcudf and passed the relevant data and masks inside of libcudf. " - ] - }, - { - "cell_type": "markdown", - "id": "scenic-laugh", - "metadata": {}, - "source": [ - "#### Problem setup: concrete example" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "above-athletics", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import cudf" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "declared-correspondence", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
011
1<NA>2
23<NA>
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 1 1\n", - "1 2\n", - "2 3 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = cudf.DataFrame({\n", - " 'x': [1, None, 3],\n", - " 'y': [1, 2, None]\n", - "})\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "expected-milwaukee", - "metadata": {}, - "source": [ - "Consider the following UDF on two variables adapted from https://docs.rapids.ai/api/cudf/stable/guide-to-udfs.html. This API is fairly different from the pandas API, for several reasons:\n", - "- In cuDF, We need to write a loop over arrays in classic numba syntax\n", - "- In cuDF, the function returns into an output column we provide as an argument\n", - "- The result is different!" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "tired-niger", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xyout
0112
1<NA>2<NA>
23<NA>3
\n", - "
" - ], - "text/plain": [ - " x y out\n", - "0 1 1 2\n", - "1 2 \n", - "2 3 3" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def pandas_add(x, y):\n", - " if x is not pd.NA and x < 2:\n", - " return x + y\n", - " else:\n", - " return x\n", - " \n", - "pandas_df = df.to_pandas(nullable=True)\n", - "pandas_df['out'] = pandas_df.apply(lambda row: pandas_add(row['x'], row['y']), axis=1)\n", - "pandas_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "employed-offer", - "metadata": {}, - "outputs": [], - "source": [ - "def gpu_add(x, y, out):\n", - " for i, (xi, yi) in enumerate(zip(x, y)):\n", - " if xi < 2:\n", - " out[i] = xi + yi\n", - " else:\n", - " out[i] = xi" - ] - }, - { - "cell_type": "markdown", - "id": "animal-gateway", - "metadata": {}, - "source": [ - "Problem: The null mask of `y` needs to only be considered if `x > 0`. But it" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "occupied-upgrade", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xyout
0112.0
1<NA>2<NA>
23<NA><NA>
\n", - "
" - ], - "text/plain": [ - " x y out\n", - "0 1 1 2.0\n", - "1 2 \n", - "2 3 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = df.apply_rows(gpu_add,\n", - " incols=['x', 'y'],\n", - " outcols={'out':np.float64},\n", - " kwargs={})\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "western-relief", - "metadata": {}, - "source": [ - "We also don't support comparing `cudf.NA` in any of our UDFs, in any way." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "metallic-macedonia", - "metadata": {}, - "outputs": [], - "source": [ - "def gpu_add_error(x, y, out):\n", - " for i, (xi, yi) in enumerate(zip(x, y)):\n", - " if xi is pd.NA:\n", - " return 5\n", - " else:\n", - " return xi + yi" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "scenic-graduate", - "metadata": {}, - "outputs": [ - { - "ename": "TypingError", - "evalue": "Failed in nopython mode pipeline (step: nopython frontend)\n\u001b[1m\u001b[1m\u001b[1mNo implementation of function Function() found for signature:\n \n >>> gpu_add_error (array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))\n \nThere are 2 candidate implementations:\n\u001b[1m - Of which 2 did not match due to:\n Overload in function 'gpu_add_error ': File: ../../../../../../ipynb/: Line 1.\n With argument(s): '(array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))':\u001b[0m\n\u001b[1m Rejected as the implementation raised a specific error:\n TypingError: Failed in nopython mode pipeline (step: nopython frontend)\n \u001b[1m\u001b[1mUnknown attribute 'NA' of type Module()\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n \u001b[0m\n \u001b[0m\u001b[1mDuring: typing of get attribute at (3)\u001b[0m\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n\u001b[0m\n raised from /home/nfs/brmiller/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py:1071\n\u001b[0m\n\u001b[0m\u001b[1mDuring: resolving callee type: Function()\u001b[0m\n\u001b[0m\u001b[1mDuring: typing of call at (8)\n\u001b[0m\n\u001b[1m\nFile \"\", line 8:\u001b[0m\n\u001b[1m\u001b[0m\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mincols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'x'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'y'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0moutcols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'out'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m kwargs={})\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/repos/cudf/python/cudf/cudf/core/dataframe.py\u001b[0m in \u001b[0;36mapply_rows\u001b[0;34m(self, func, incols, outcols, kwargs, pessimistic_nulls, cache_key)\u001b[0m\n\u001b[1;32m 4776\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4777\u001b[0m \u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4778\u001b[0;31m \u001b[0mcache_key\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4779\u001b[0m )\n\u001b[1;32m 4780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mapply_rows\u001b[0;34m(df, func, incols, outcols, kwargs, pessimistic_nulls, cache_key)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mincols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutcols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache_key\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m )\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mapplyrows\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, df, **launch_params)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0mbound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0;31m# Launch kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlaunch_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mlaunch_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 171\u001b[0m \u001b[0;31m# Prepare pessimistic nullmask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpessimistic_nulls\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/repos/cudf/python/cudf/cudf/utils/applyutils.py\u001b[0m in \u001b[0;36mlaunch_kernel\u001b[0;34m(self, df, args)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlaunch_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 197\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 372\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 373\u001b[0m \u001b[0mblockdim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_thread_per_block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[0mgriddim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mntasks\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mblockdim\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mblockdim\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mspecialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0mtargetoptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'link'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m specialization = Dispatcher(self.py_func, [types.void(*argtypes)],\n\u001b[0;32m--> 899\u001b[0;31m self._bind, targetoptions)\n\u001b[0m\u001b[1;32m 900\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecializations\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margtypes\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspecialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mspecialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, func, sigs, bind, targetoptions)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Only one signature supported at present\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 811\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msigs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 812\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_compile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 947\u001b[0m kernel = compile_kernel(self.py_func, argtypes,\n\u001b[1;32m 948\u001b[0m \u001b[0mlink\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 949\u001b[0;31m **self.targetoptions)\n\u001b[0m\u001b[1;32m 950\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefinitions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 951\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_bind\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile_kernel\u001b[0;34m(pyfunc, args, link, debug, inline, fastmath, extensions, max_registers, opt)\u001b[0m\n\u001b[1;32m 55\u001b[0m def compile_kernel(pyfunc, args, link, debug=False, inline=False,\n\u001b[1;32m 56\u001b[0m fastmath=False, extensions=[], max_registers=None, opt=True):\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0mcres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompile_cuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpyfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvoid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minline\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0mfname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfndesc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllvm_func_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library, fname,\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mcompile_cuda\u001b[0;34m(pyfunc, return_type, args, debug, inline)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mreturn_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m locals={})\n\u001b[0m\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0mlibrary\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlibrary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36mcompile_extra\u001b[0;34m(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)\u001b[0m\n\u001b[1;32m 600\u001b[0m pipeline = pipeline_class(typingctx, targetctx, library,\n\u001b[1;32m 601\u001b[0m args, return_type, flags, locals)\n\u001b[0;32m--> 602\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile_extra\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 603\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36mcompile_extra\u001b[0;34m(self, func)\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlifted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlifted_from\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 352\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compile_bytecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 353\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompile_ir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc_ir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlifted\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlifted_from\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_bytecode\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 412\u001b[0m \"\"\"\n\u001b[1;32m 413\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc_ir\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 414\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compile_core\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 415\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_compile_ir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfail_reason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_final_pipeline\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCompilerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"All available pipelines exhausted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler.py\u001b[0m in \u001b[0;36m_compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 385\u001b[0;31m \u001b[0mpm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 386\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipeline_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpass_desc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mpatched_exception\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_patch_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mpatched_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdependency_analysis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0mpass_inst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_pass_registry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpass_inst\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpass_inst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCompilerPass\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 330\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_runPass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpass_inst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 331\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Legacy pass in use\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_lock.py\u001b[0m in \u001b[0;36m_acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_acquire_compile_lock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36m_runPass\u001b[0;34m(self, index, pss, internal_state)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_initialization\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSimpleTimer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpass_time\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 289\u001b[0;31m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_pass\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 290\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSimpleTimer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfinalize_time\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0mmutated\u001b[0m \u001b[0;34m|=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_finalizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minternal_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/compiler_machinery.py\u001b[0m in \u001b[0;36mcheck\u001b[0;34m(func, compiler_state)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompiler_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m \u001b[0mmangled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompiler_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 263\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmangled\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 264\u001b[0m msg = (\"CompilerPass implementations should return True/False. \"\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typed_passes.py\u001b[0m in \u001b[0;36mrun_pass\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocals\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m raise_errors=self._raise_errors)\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtypemap\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtypemap\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_errors\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typed_passes.py\u001b[0m in \u001b[0;36mtype_inference_stage\u001b[0;34m(typingctx, interp, args, return_type, locals, raise_errors)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_constraint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpropagate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0mtypemap\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrestype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcalltypes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mraise_errors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py\u001b[0m in \u001b[0;36mpropagate\u001b[0;34m(self, raise_errors)\u001b[0m\n\u001b[1;32m 1069\u001b[0m if isinstance(e, ForceLiteralArg)]\n\u001b[1;32m 1070\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mforce_lit_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1071\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1072\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1073\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mor_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_lit_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypingError\u001b[0m: Failed in nopython mode pipeline (step: nopython frontend)\n\u001b[1m\u001b[1m\u001b[1mNo implementation of function Function() found for signature:\n \n >>> gpu_add_error (array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))\n \nThere are 2 candidate implementations:\n\u001b[1m - Of which 2 did not match due to:\n Overload in function 'gpu_add_error ': File: ../../../../../../ipynb/: Line 1.\n With argument(s): '(array(int64, 1d, A), array(int64, 1d, A), array(float64, 1d, A))':\u001b[0m\n\u001b[1m Rejected as the implementation raised a specific error:\n TypingError: Failed in nopython mode pipeline (step: nopython frontend)\n \u001b[1m\u001b[1mUnknown attribute 'NA' of type Module()\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n \u001b[0m\n \u001b[0m\u001b[1mDuring: typing of get attribute at (3)\u001b[0m\n \u001b[1m\n File \"\", line 3:\u001b[0m\n \u001b[1mdef gpu_add_error(x, y, out):\n \n for i, (xi, yi) in enumerate(zip(x, y)):\n \u001b[1m if xi is pd.NA:\n \u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n\u001b[0m\n raised from /home/nfs/brmiller/anaconda3/envs/cudf_dev/lib/python3.7/site-packages/numba/core/typeinfer.py:1071\n\u001b[0m\n\u001b[0m\u001b[1mDuring: resolving callee type: Function()\u001b[0m\n\u001b[0m\u001b[1mDuring: typing of call at (8)\n\u001b[0m\n\u001b[1m\nFile \"\", line 8:\u001b[0m\n\u001b[1m\u001b[0m\n" - ] - } - ], - "source": [ - "df = df.apply_rows(gpu_add_error,\n", - " incols=['x', 'y'],\n", - " outcols={'out':np.float64},\n", - " kwargs={})\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "material-financing", - "metadata": {}, - "source": [ - "#### Why\n", - "This is because nulls are generally handled \"pessimistically\", which roughly means the value of the output mask bit is just set to be a big `or` between all the input column's bitmasks. This isn't a problem in pandas, because the UDF is applied by looping through the rows and individually passing each value elementwise through the UDF. When it encounters a null, the value that gets passed is `pd.NA`, which behaves the way it needs to for the function to return the correct value for that row. \n", - "#### The two things we want to do then are:\n", - "- Make the API feel a little more natural\n", - "- explicitly be able to handle nulls in a dynamic way" - ] - }, - { - "cell_type": "markdown", - "id": "informal-wisdom", - "metadata": {}, - "source": [ - "# Detour: The cuDF UnaryOp Compilation Pipeline" - ] - }, - { - "cell_type": "markdown", - "id": "mineral-roman", - "metadata": {}, - "source": [ - "```\n", - "Python Function -> Numba -> PTX Code -> libcudf parser -> inlineable function -> Jitify -> Execution\n", - " | \n", - " data pointers ---------^\n", - " headers ---------------^\n", - " extra kernel code -----^\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "overall-discretion", - "metadata": {}, - "source": [ - "The proposed solution to this problem draws heavily on the existing concepts in cuDF's unaryop machinery. This is a situation where the API feels really natural and is quite compatible with pandas, even though ours is named `applymap` and theirs is named `apply` for some reason." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "expired-finder", - "metadata": {}, - "outputs": [], - "source": [ - "x = cudf.Series([1, None, 3])\n", - "\n", - "def f(x):\n", - " return x + 1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "attended-mount", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2\n", - "1 \n", - "2 4\n", - "dtype: int64" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x.applymap(f)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "guided-slave", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 \n", - "2 3\n", - "dtype: Int64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x_pd = x.to_pandas(nullable=True)\n", - "x_pd" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "modern-barrier", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2\n", - "1 \n", - "2 4\n", - "dtype: object" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x_pd.apply(f)" - ] - }, - { - "cell_type": "markdown", - "id": "general-madagascar", - "metadata": {}, - "source": [ - "The null handling here is pretty simple - it's always a copy of the original bitmask. But that's not why we're here. Let's pop the hood. From https://github.com/rapidsai/cudf/blob/branch-0.19/python/cudf/cudf/core/column/numerical.py#L721-L726:\n", - "\n", - "```\n", - "def _numeric_column_unaryop(operand: ColumnBase, op: str) -> ColumnBase:\n", - " if callable(op):\n", - " return libcudf.transform.transform(operand, op)\n", - "\n", - " op = libcudf.unary.UnaryOp[op.upper()]\n", - " return libcudf.unary.unary_operation(operand, op)\n", - "```\n", - "\n", - "From here the `transform` cython picks up the callable python function as well as the `Column` to which it is to be applied. Here's some pseudocode for what happens inside it:\n", - "\n", - "```\n", - "def transform(Column input, op):\n", - " signature = get_signature(input)\n", - " compiled_op = cudautils.compile_udf(op, signature)\n", - " c_str = compiled_op[0].encode('UTF-8')\n", - "\n", - " c_output = move(\n", - " libcudf_transform(\n", - " input,\n", - " c_str\n", - " )\n", - "\n", - " )\n", - "\n", - " return Column.from_unique_ptr(move(c_output))\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "explicit-lebanon", - "metadata": {}, - "source": [ - "What we have so far then is:\n", - "1. Our input column\n", - "2. A PTX function compiled by Numba based off the python function, and the type of the argument being passed\n", - "\n", - "`cudautils.compile_udf` calls out to Numba to transform the pure python function into PTX code through LLVM IR and a series of compilation steps. The pipeline inside Numba goes something like:\n", - "\n", - "```\n", - "Python function -> python bytecode -> type inference -> lowering -> LLVM IR -> PTX code\n", - "```\n", - "\n", - "#### Python bytecode: These are instructions for the python interpreter" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "fifteen-device", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 4 0 LOAD_FAST 0 (x)\n", - " 2 LOAD_CONST 1 (1)\n", - " 4 BINARY_ADD\n", - " 6 RETURN_VALUE\n" - ] - } - ], - "source": [ - "import dis\n", - "dis.dis(f)" - ] - }, - { - "cell_type": "markdown", - "id": "latter-helping", - "metadata": {}, - "source": [ - "#### Type inference: Assembly level languages only operate in terms of primitive types. \n", - "\n", - "Thus to generate LLVM IR, Numba needs to know the types of every variable at every point during the function, from arguments to return values. This is one of the reasons `signature` is a required arg to `compile_udf`. We only get this information at runtime, because the user can pass anything into their UDF. When they apply their UDF to a `Series`, it's only at that point Numba can know that the `x` in `f(x)` is of type `int64` for instance - and only then can it actually complete the type inference portion of the process.\n", - "\n", - "#### Lowering\n", - "Once type inference is complete and Numba knows the types of all the input, output, and intermediate variables, it combines that with the algorithmic information from the python function's bytecode and produces LLVM IR in a process called \"lowering\". LLVM IR is like a platform independent assembly language. One can compile from LLVM IR to assembly code for any platform, including into PTX code for NVIDIA GPUs\n", - "\n", - "\n", - "#### What does this mean for us?\n", - "It means that what we get out of `cudautils.compile_udf` is an actual string containing a PTX function, compiled by Numba for arguments of the type `input.dtype`. It is important to note that this function is a function that operates, like the original function, on a single element. It does NOT contain a kernel. In fact, here's exactly what it is:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "direct-venezuela", - "metadata": {}, - "outputs": [], - "source": [ - "from cudf.utils.cudautils import compile_udf" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "engaged-coaching", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "//\n", - "// Generated by NVIDIA NVVM Compiler\n", - "//\n", - "// Compiler Build ID: CL-27506705\n", - "// Cuda compilation tools, release 10.2, V10.2.89\n", - "// Based on LLVM 3.4svn\n", - "//\n", - "\n", - ".version 6.5\n", - ".target sm_70\n", - ".address_size 64\n", - "\n", - "\t// .globl\t_ZN8__main__5f$248Ex\n", - ".common .global .align 8 .u64 _ZN08NumbaEnv8__main__5f$248Ex;\n", - "\n", - ".visible .func (.param .b32 func_retval0) _ZN8__main__5f$248Ex(\n", - "\t.param .b64 _ZN8__main__5f$248Ex_param_0,\n", - "\t.param .b64 _ZN8__main__5f$248Ex_param_1\n", - ")\n", - "{\n", - "\t.reg .b32 \t%r<2>;\n", - "\t.reg .b64 \t%rd<4>;\n", - "\n", - "\n", - "\tld.param.u64 \t%rd1, [_ZN8__main__5f$248Ex_param_0];\n", - "\tld.param.u64 \t%rd2, [_ZN8__main__5f$248Ex_param_1];\n", - "\tadd.s64 \t%rd3, %rd2, 1;\n", - "\tst.u64 \t[%rd1], %rd3;\n", - "\tmov.u32 \t%r1, 0;\n", - "\tst.param.b32\t[func_retval0+0], %r1;\n", - "\tret;\n", - "}\n", - "\n", - "\n", - "\u0000\n" - ] - } - ], - "source": [ - "from numba.np import numpy_support\n", - "numba_type = numpy_support.from_dtype(np.dtype('int64'))\n", - "ptx, _ = compile_udf(f, (numba_type,))\n", - "\n", - "print(ptx)" - ] - }, - { - "cell_type": "markdown", - "id": "inclusive-endorsement", - "metadata": {}, - "source": [ - "```\n", - "// .globl main\n", - "\n", - ".visible .func (.param .b32 return_value) main(\n", - "\t.param .b64 param_0, # TWO input parameters\n", - "\t.param .b64 param_1 \n", - ")\n", - "{\n", - "\t.reg .b32 \t%r<2>; # declare two 32-bit registers, named %r1 and %r2\n", - "\t.reg .b64 \t%rd<4>; # declare 4 64-bit registers named %rd1, %rd2, %rd3, %rd4\n", - "\n", - "\n", - "\tld.param.u64 \t%rd1, [param_0]; # load param_0 into %rd1\n", - "\tld.param.u64 \t%rd2, [param_1]; # load param_1 into %rd2\n", - "\tadd.s64 \t%rd3, %rd2, 1; # take the value of %rd2 (e.g. param_1) add 1, place in %rd3\n", - "\tst.u64 \t[%rd1], %rd3; # store the value of %rd3 into the location pointer to by %rd1\n", - "\tmov.u32 \t%r1, 0; # move 0 into %r1\n", - "\tst.param.b32\t[func_retval0+0], %r1; # place the value of %r1 into the return value\n", - "\tret; # return 0\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "imperial-quality", - "metadata": {}, - "source": [ - "# What happens next?\n", - "Libcudf takes it from here. Broadly speaking, what happens at this point is libcudf hacks together a string (which consists of several elements) which ends up being handed off to jitify and compiled into a final kernel. Jitify then launches that kernel, taking the pointer to the beginning of the actual data column to be transformed as an argument. The three elements are:\n", - "\n", - "1. A header\n", - "2. A outer \"calling\" kernel that generically calls the PTX function\n", - "3. A processed version of the PTX function that inlines it directly into CUDA\n", - "\n", - "The libcudf parser essentially takes the PTX function as above and turns it into a generically callable inlinable function. The final file that gets passed off to jitify to be compiled looks like this:\n", - "\n", - "```\n", - "\n", - "#pragma once\n", - "\n", - "// Include Jitify's cstddef header first\n", - "#include \n", - "\n", - "#include \n", - "#include \n", - "#include \n", - "#include \n", - "#include \n", - "#include \n", - "\n", - "template \n", - " __global__\n", - " void kernel(cudf::size_type size,\n", - " TypeOut* out_data, TypeIn* in_data) {\n", - " int tid = threadIdx.x;\n", - " int blkid = blockIdx.x;\n", - " int blksz = blockDim.x;\n", - " int gridsz = gridDim.x;\n", - "\n", - " int start = tid + blkid * blksz;\n", - " int step = blksz * gridsz;\n", - "\n", - " for (cudf::size_type i=start; i;\");\n", - " /** .reg .b32 \t%r<2> */\n", - " asm volatile (\" .reg .b64 _rd<4>;\");\n", - " /** .reg .b64 \t%rd<4> */\n", - " asm volatile (\" mov.u64 _rd1, %0;\": : \"l\"(_ZN8__main__5f_241Ex_param_0));\n", - " /** ld.param.u64 \t%rd1, [_ZN8__main__5f$241Ex_param_0]\n", - " asm volatile (\" mov.u64 _rd2, %0;\": : \"l\"(_ZN8__main__5f_241Ex_param_1));\n", - " /** ld.param.u64 \t%rd2, [_ZN8__main__5f$241Ex_param_1] \n", - " asm volatile (\" add.s64 _rd3, _rd2, 1;\");\n", - " /** add.s64 \t%rd3, %rd2, 1 */\n", - " asm volatile (\" st.u64 [_rd1], _rd3;\");\n", - " /** st.u64 \t[%rd1], %rd3 */\n", - " asm volatile (\" mov.u32 _r1, 0;\");\n", - " /** mov.u32 \t%r1, 0 */\n", - " asm volatile (\" /** *** SNIP. *** */\");\n", - " /** st.param.b32\t[func_retval0+0], %r1 */\n", - " asm volatile (\"bra RETTGT;\");\n", - " asm volatile (\"RETTGT:}\");\n", - "\n", - "}\n", - "\n", - "```\n", - "\n", - "Apart from returning the data back to the user, that's more or less the process. " - ] - }, - { - "cell_type": "markdown", - "id": "musical-membership", - "metadata": {}, - "source": [ - "# What does all this have to do with NAs?\n", - "The pipeline is based on the idea that anything you could want to do with a single value of `x` arithmatically is expressable as a generic PTX function of `x` as along with some type information, that numba can generate for you. The rest of the machinery is just meant to deliver the data to this function threadwise. We're going to extend this concept to a function of four variables instead of one: a masked binary operation `x + y` where the four arguments are:\n", - "\n", - "1. `x`\n", - "2. `y`\n", - "3. `x.mask`\n", - "4. `y.mask`\n", - "\n", - "\n", - "We're going to modify the general kernel that calls `GENERIC_UNARY_OP` and generalizes it to accept these four arguments and call a `GENERIC_BINARY_OP` instead (with two extra arguments - the mask bools)\n", - "\n", - "# Creating a Numba extension type\n", - "\n", - "Remember how Numba produces PTX code from a python function and some type information? We're going to create a new Type in Numba that is build around a Struct:\n", - "\n", - "```\n", - "struct Masked {\n", - " int64_t value;\n", - " bool valid;\n", - "}\n", - "```\n", - "And we're going to add an overload of `add` (`+`, `operator.add`) to Numba's registry of function signatures that correctly handles null semantics. Then we're going to JIT the incoming python function and use a `Masked` type for every argument. " - ] - }, - { - "cell_type": "markdown", - "id": "qualified-communications", - "metadata": {}, - "source": [ - "#### Tell Numba that a `MaskedType` exists, and not much else" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "boring-orleans", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.core.extending import types\n", - "class MaskedType(types.Type):\n", - " # A corresponding MaskedType for numba\n", - " # numba can only generate LLVM IR for things\n", - " # that it recognizes. This is the most basic\n", - " # thing needed for numba to recognize the type,\n", - " # all it really says is \"there's a type, \n", - " # called MaskedType\". name is for __repr__\n", - " def __init__(self):\n", - " super().__init__(name=\"Masked\")\n", - " \n", - "numba_masked = MaskedType()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "annual-regard", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.core.extending import make_attribute_wrapper\n", - "\n", - "make_attribute_wrapper(MaskedType, \"value\", \"value\")\n", - "make_attribute_wrapper(MaskedType, \"valid\", \"valid\")" - ] - }, - { - "cell_type": "markdown", - "id": "persistent-palace", - "metadata": {}, - "source": [ - "#### Tell Numba what this type looks like. In our case, it's a struct. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "opening-specific", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.core.extending import register_model, models\n", - "\n", - "@register_model(MaskedType)\n", - "class MaskedModel(models.StructModel):\n", - " def __init__(self, dmm, fe_type):\n", - " members = [(\"value\", types.int64), (\"valid\", types.bool_)]\n", - " models.StructModel.__init__(self, dmm, fe_type, members)" - ] - }, - { - "cell_type": "markdown", - "id": "posted-confirmation", - "metadata": {}, - "source": [ - "#### Register an overload of `operator.add` with Numba's registry of `CUDA` functions. \n", - "This is part of the typing phase. When we pass `f(x, y): return x + y` into Numba and say that `x` and `y` are of type `Masked`, it hits the `x + y` statement and goes looking for an overload of `add` with a signature matching those operands. It works by either finding a match and the end or not having one. This piece of code conditionally emits the signature it needs to find, when prompted with two arguments of type `Masked`. One can see how they might dynamically return different types depending on arguments. But this roughly says \"when Numba looks for an overload of `add` that takes two `Masked` as arguments, let it know that there is one, and it will return a `Masked`. " - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "subjective-marshall", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.cuda.cudadecl import registry as cuda_registry\n", - "import operator\n", - "from numba.core.typing.templates import AbstractTemplate\n", - "\n", - "\n", - "@cuda_registry.register_global(operator.add)\n", - "class MaskedScalarAdd(AbstractTemplate):\n", - " # abstracttemplate vs concretetemplate\n", - " def generic(self, args, kws):\n", - " if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType):\n", - " return signature(numba_masked, numba_masked, numba_masked)" - ] - }, - { - "cell_type": "markdown", - "id": "compliant-strengthening", - "metadata": {}, - "source": [ - "#### Implement Masked + Masked\n", - "So far, Numba knows:\n", - "- There's a `MaskedType`. \n", - "- There's an overload of `operator.add` that accepts two `MaskedType` and returns a `MaskedType`\n", - "\n", - "Now it essentially needs an implementation for that overload of `operator.add`." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "treated-pastor", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.cuda.cudaimpl import lower as cuda_lower\n", - "\n", - "@cuda_lower(operator.add, MaskedType, MaskedType)\n", - "def masked_scalar_add_impl(context, builder, sig, args):\n", - " # get the types from the signature\n", - " masked_type_1, masked_type_2 = sig.args\n", - " masked_return_type = sig.return_type\n", - "\n", - " # create LLVM IR structs\n", - " m1 = cgutils.create_struct_proxy(masked_type_1)(\n", - " context, builder, value=args[0]\n", - " )\n", - " m2 = cgutils.create_struct_proxy(masked_type_2)(\n", - " context, builder, value=args[1]\n", - " )\n", - " result = cgutils.create_struct_proxy(masked_return_type)(context, builder)\n", - "\n", - " valid = builder.and_(m1.valid, m2.valid)\n", - " result.valid = valid\n", - " with builder.if_then(valid):\n", - " result.value = builder.add(m1.value, m2.value)\n", - "\n", - " return result._getvalue()" - ] - }, - { - "cell_type": "markdown", - "id": "clear-assessment", - "metadata": {}, - "source": [ - "# Testing it Out" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "formed-soccer", - "metadata": {}, - "outputs": [], - "source": [ - "from numba import cuda\n", - "def compile_masked(func):\n", - " signature = (numba_masked, numba_masked)\n", - " ptx, _ = cuda.compile_ptx_for_current_device(func, signature, device=True)\n", - " return ptx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "essential-shade", - "metadata": {}, - "outputs": [], - "source": [ - "def f(x, y):\n", - " return x + y" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "understanding-firmware", - "metadata": {}, - "outputs": [], - "source": [ - "#ptx = compile_masked(f)" - ] - }, - { - "cell_type": "markdown", - "id": "wound-communications", - "metadata": {}, - "source": [ - "```\n", - " // .globl _ZN8__main__6f$2411E6Masked6Masked \n", - ".common .global .align 8 .u64 _ZN08NumbaEnv8__main__6f$2411E6Masked6Masked; \n", - " \n", - ".visible .func (.param .b32 func_retval0) _ZN8__main__6f$2411E6Masked6Masked( \n", - " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_0, \n", - " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_1, \n", - " .param .b32 _ZN8__main__6f$2411E6Masked6Masked_param_2, \n", - " .param .b64 _ZN8__main__6f$2411E6Masked6Masked_param_3, \n", - " .param .b32 _ZN8__main__6f$2411E6Masked6Masked_param_4 \n", - ") \n", - "{ \n", - " .reg .pred %p<4>; \n", - " .reg .b16 %rs<4>; \n", - " .reg .b32 %r<2>; \n", - " .reg .b64 %rd<6>; \n", - " \n", - " \n", - " ld.param.u64 %rd1, [_ZN8__main__6f$2411E6Masked6Masked_param_0]; \n", - " ld.param.u64 %rd2, [_ZN8__main__6f$2411E6Masked6Masked_param_1]; \n", - " ld.param.u64 %rd3, [_ZN8__main__6f$2411E6Masked6Masked_param_3]; \n", - " ld.param.u8 %rs1, [_ZN8__main__6f$2411E6Masked6Masked_param_2]; \n", - " setp.ne.s16 %p1, %rs1, 0; \n", - " ld.param.u8 %rs2, [_ZN8__main__6f$2411E6Masked6Masked_param_4]; \n", - " setp.ne.s16 %p2, %rs2, 0; \n", - " and.pred %p3, %p1, %p2; \n", - " add.s64 %rd4, %rd3, %rd2; \n", - " selp.b64 %rd5, %rd4, 0, %p3; \n", - " selp.u16 %rs3, 1, 0, %p3; \n", - " st.u64 [%rd1], %rd5; \n", - " st.u8 [%rd1+8], %rs3; \n", - " mov.u32 %r1, 0; \n", - " st.param.b32 [func_retval0+0], %r1; \n", - " ret; \n", - "} \n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "atmospheric-database", - "metadata": {}, - "source": [ - "#### Then, this is the whole file being passed to jitify:\n", - "\n", - "```cuda\n", - "\n", - " #pragma once\n", - "\n", - " // Include Jitify's cstddef header first\n", - " #include \n", - "\n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - "\n", - " struct Masked {\n", - " int64_t value;\n", - " bool valid;\n", - " };\n", - " \n", - " \n", - "\n", - " void null_kernel(cudf::size_type size,\n", - " TypeOut* out_data, \n", - " TypeLhs* lhs_data,\n", - " TypeRhs* rhs_data,\n", - " bool* out_mask,\n", - " cudf::bitmask_type const* lhs_mask,\n", - " cudf::size_type lhs_offset,\n", - " cudf::bitmask_type const* rhs_mask,\n", - " cudf::size_type rhs_offset\n", - " ) {\n", - " int tid = threadIdx.x;\n", - " int blkid = blockIdx.x;\n", - " int blksz = blockDim.x;\n", - " int gridsz = gridDim.x;\n", - "\n", - " int start = tid + blkid * blksz;\n", - " int step = blksz * gridsz;\n", - "\n", - " Masked output;\n", - "\n", - " char l_valid;\n", - " char r_valid;\n", - "\n", - " long int l_data;\n", - " long int r_data;\n", - "\n", - " for (cudf::size_type i=start; i;\"); \n", - " /** .reg .pred %p<4> */ \n", - " \n", - " asm volatile (\" .reg .b16 _rs<4>;\"); \n", - " /** .reg .b16 %rs<4> */ \n", - " \n", - " asm volatile (\" .reg .b32 _r<2>;\"); \n", - " /** .reg .b32 %r<2> */ \n", - " \n", - " asm volatile (\" .reg .b64 _rd<6>;\"); \n", - " /** .reg .b64 %rd<6> */ \n", - "\n", - " asm volatile (\" mov.u64 _rd1, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_0));\n", - " /** ld.param.u64 %rd1, [_ZN8__main__6f$2413E6Masked6Masked_param_0] */\n", - "\n", - " asm volatile (\" mov.u64 _rd2, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_1));\n", - " /** ld.param.u64 %rd2, [_ZN8__main__6f$2413E6Masked6Masked_param_1] */\n", - "\n", - " asm volatile (\" mov.u64 _rd3, %0;\": : \"l\"(_ZN8__main__6f_2413E6Masked6Masked_param_3));\n", - " /** ld.param.u64 %rd3, [_ZN8__main__6f$2413E6Masked6Masked_param_3] */\n", - "\n", - " asm volatile (\" cvt.u8.u8 _rs1, %0;\": : \"h\"( static_cast(_ZN8__main__6f_2413E6Masked6Masked_param_2)));\n", - " /** ld.param.u8 %rs1, [_ZN8__main__6f$2413E6Masked6Masked_param_2] */\n", - "\n", - " asm volatile (\" setp.ne.s16 _p1, _rs1, 0;\");\n", - " /** setp.ne.s16 %p1, %rs1, 0 */\n", - "\n", - " asm volatile (\" cvt.u8.u8 _rs2, %0;\": : \"h\"( static_cast(_ZN8__main__6f_2413E6Masked6Masked_param_4)));\n", - " /** ld.param.u8 %rs2, [_ZN8__main__6f$2413E6Masked6Masked_param_4] */\n", - "\n", - " asm volatile (\" setp.ne.s16 _p2, _rs2, 0;\");\n", - " /** setp.ne.s16 %p2, %rs2, 0 */\n", - "\n", - " asm volatile (\" and.pred _p3, _p1, _p2;\");\n", - " /** and.pred %p3, %p1, %p2 */\n", - "\n", - " asm volatile (\" add.s64 _rd4, _rd3, _rd2;\");\n", - " /** add.s64 %rd4, %rd3, %rd2 */\n", - "\n", - " asm volatile (\" selp.b64 _rd5, _rd4, 0, _p3;\");\n", - " /** selp.b64 %rd5, %rd4, 0, %p3 */\n", - "\n", - " asm volatile (\" selp.u16 _rs3, 1, 0, _p3;\");\n", - " /** selp.u16 %rs3, 1, 0, %p3 */\n", - "\n", - " asm volatile (\" st.u64 [_rd1], _rd5;\");\n", - " /** st.u64 [%rd1], %rd5 */\n", - "\n", - " asm volatile (\" st.u8 [_rd1+8], _rs3;\");\n", - " /** st.u8 [%rd1+8], %rs3 */\n", - "\n", - " asm volatile (\" mov.u32 _r1, 0;\");\n", - " /** mov.u32 %r1, 0 */\n", - "\n", - " asm volatile (\" /** *** The way we parse the CUDA PTX assumes the function returns the return value through the first function parameter. Thus the `st.param.***` instructions are not processed. *** */\");\n", - " /** st.param.b32 [func_retval0+0], %r1 */\n", - "\n", - " asm volatile (\"bra RETTGT;\");\n", - "\n", - "\n", - " asm volatile (\"RETTGT:}\");} \n", - " \n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "concrete-pillow", - "metadata": {}, - "source": [ - "# Test it\n", - "Here are some very basic cython bindings just used for the purposes of testing this exact functionality\n", - "```\n", - "def masked_binary_op(Column A, Column B, op, Column output_column, Column output_mask):\n", - " cdef column_view A_view = A.view()\n", - " cdef column_view B_view = B.view()\n", - "\n", - " cdef string c_str\n", - " cdef type_id c_tid\n", - " cdef data_type c_dtype\n", - "\n", - " if A.dtype != np.dtype('int64') or B.dtype != np.dtype('int64'):\n", - " raise TypeError('int64 please')\n", - " \n", - " \n", - " from cudf.core.udf import compile_udf\n", - " c_str = compile_udf(op).encode('UTF-8')\n", - "\n", - " c_tid = (\n", - " np_to_cudf_types[np.dtype('int64')]\n", - " )\n", - " c_dtype = data_type(c_tid)\n", - "\n", - " cdef column_view outcol_view = output_column.view()\n", - " cdef column_view outmsk_view = output_mask.view()\n", - "\n", - " with nogil:\n", - " c_output = move(libcudf_transform.masked_binary_op(\n", - " A_view,\n", - " B_view,\n", - " c_str,\n", - " c_dtype,\n", - " outcol_view,\n", - " outmsk_view\n", - " ))\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "after-fellow", - "metadata": {}, - "outputs": [], - "source": [ - "from cudf._lib.transform import masked_binary_op\n", - "\n", - "def demo_udf(func, s1, s2):\n", - " col1, col2 = s1._column, s2._column\n", - "\n", - " output_column = cudf.core.column.as_column(np.arange(8), dtype='int64')\n", - " output_mask = cudf.core.column.as_column([False] * 8)\n", - "\n", - " result_col = masked_binary_op(col1, col2, func, output_column, output_mask)\n", - " return cudf.Series(result_col)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "phantom-square", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2\n", - "1 \n", - "2 \n", - "3 \n", - "4 6\n", - "5 \n", - "6 10\n", - "7 \n", - "dtype: int64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def f(x, y):\n", - " return x + y\n", - "\n", - "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", - "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", - "\n", - "demo_udf(f, s1, s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "southern-stationery", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2\n", - "1 \n", - "2 \n", - "3 \n", - "4 6\n", - "5 \n", - "6 10\n", - "7 \n", - "dtype: int64" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s1 + s2" - ] - }, - { - "cell_type": "markdown", - "id": "recovered-premiere", - "metadata": {}, - "source": [ - "# `cudf.NA`\n", - "In general, we wan't the capability to work with `cudf.NA` inside our functions directly. To do this, we're just going to reapply the same machinery to overload what happens when we add a `MaskedType` to `cudf.NA`." - ] - }, - { - "cell_type": "markdown", - "id": "filled-malta", - "metadata": {}, - "source": [ - "#### Create an NAType" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "chemical-trick", - "metadata": {}, - "outputs": [], - "source": [ - "from cudf.core.scalar import _NAType\n", - "class NAType(types.Type):\n", - " # \"There is a type called NAType\"\n", - " def __init__(self):\n", - " super().__init__(name=\"NA\")\n", - "\n", - "numba_na = NAType()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "southern-prague", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "numba.core.datamodel.models.OpaqueModel" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from numba.core.extending import typeof_impl\n", - "@typeof_impl.register(_NAType)\n", - "def typeof_na(val, c):\n", - " # instances of _NAType will be \n", - " # treaded as instances of NAType. \n", - " return numba_na\n", - "\n", - "\n", - "\n", - "register_model(NAType)(models.OpaqueModel)" - ] - }, - { - "cell_type": "markdown", - "id": "accessible-monthly", - "metadata": {}, - "source": [ - "#### `operator.add` typing for Masked <-> NA" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "harmful-glossary", - "metadata": {}, - "outputs": [], - "source": [ - "@cuda_registry.register_global(operator.add)\n", - "class MaskedScalarAddNull(AbstractTemplate):\n", - " def generic(self, args, kws):\n", - " if isinstance(args[0], MaskedType) and isinstance(args[1], NAType):\n", - " return signature(numba_masked, numba_masked, numba_na)\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "brown-cheese", - "metadata": {}, - "source": [ - "#### Lowering: AKA what to actually do when this is requested\n", - "This says that when `+` is invoked between a `MaskedType` and an `NAType`, to make a new `MaskedType`, set it's validity to zero and return it." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "encouraging-reynolds", - "metadata": {}, - "outputs": [], - "source": [ - "from numba.cuda.cudaimpl import registry as cuda_lowering_registry\n", - "\n", - "@cuda_lower(operator.add, MaskedType, NAType)\n", - "def masked_scalar_add_na_impl(context, builder, sig, args):\n", - "# return_type = sig.return_type\n", - " # use context to get llvm type for a bool\n", - " result = cgutils.create_struct_proxy(numba_masked)(context, builder)\n", - " result.valid = context.get_constant(types.boolean, 0)\n", - " return result._getvalue()\n", - "\n", - "\n", - "@cuda_lowering_registry.lower_constant(NAType)\n", - "def constant_dummy(context, builder, ty, pyval):\n", - " # This handles None, etc.\n", - " return context.get_dummy_value()" - ] - }, - { - "cell_type": "markdown", - "id": "productive-rings", - "metadata": {}, - "source": [ - "# Constants\n", - "At this point the pattern is familiar. Register an overload that emits a signature if the operands match a `MaskedType` and a constant. The lowering is logically fairly simple. If the `MaskedType` is null, the answer is null, else the answer is a new `MaskedType` whose `value` is the sum of the inputs `value` and the constant." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "short-makeup", - "metadata": {}, - "outputs": [], - "source": [ - "from llvmlite import ir\n", - "\n", - "@cuda_registry.register_global(operator.add)\n", - "class MaskedScalarAddConstant(AbstractTemplate):\n", - " def generic(self, args, kws):\n", - " if isinstance(args[0], MaskedType) and isinstance(args[1], types.Integer):\n", - " return signature(numba_masked, numba_masked, types.int64)\n", - "\n", - "@cuda_lower(operator.add, MaskedType, types.Integer)\n", - "def masked_scalar_add_constant_impl(context, builder, sig, input_values):\n", - " masked_type, const_type = sig.args\n", - "\n", - " indata = cgutils.create_struct_proxy(masked_type)(context, builder, value=input_values[0])\n", - " result = cgutils.create_struct_proxy(numba_masked)(context, builder)\n", - " #to_add_const = context.get_constant(const_type, input_values[1])\n", - "\n", - " result.valid = context.get_constant(types.boolean, 0)\n", - " with builder.if_then(indata.valid):\n", - " result.value = builder.add(indata.value, input_values[1])\n", - " result.valid = context.get_constant(types.boolean, 1)\n", - "\n", - " return result._getvalue()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "entitled-wealth", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "\n", - "def f(x, y):\n", - " return x + y + cudf.NA\n", - "\n", - "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", - "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", - "\n", - "result = demo_udf(f, s1, s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "genuine-davis", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", - "5 \n", - "6 \n", - "7 \n", - "dtype: int64" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "polyphonic-second", - "metadata": {}, - "outputs": [], - "source": [ - "def f(x, y):\n", - " return x + y + 1\n", - "\n", - "s1 = cudf.Series([1, None, 3, None, 2, 2, 5, None])\n", - "s2 = cudf.Series([1, 2, None, None, 4, None, 5, None])\n", - "\n", - "result = demo_udf(f, s1, s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "sporting-campbell", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 3\n", - "1 \n", - "2 \n", - "3 \n", - "4 7\n", - "5 \n", - "6 11\n", - "7 \n", - "dtype: int64" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result" - ] - }, - { - "cell_type": "markdown", - "id": "focal-castle", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "romance-complaint", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "speaking-aquatic", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 62ddca757cb21949db31b2f9436f9d1ce1fa907f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 13:13:49 -0700 Subject: [PATCH 093/132] cleanup --- python/cudf/cudf/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 86abfffa697..b4440805618 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1501,7 +1501,7 @@ def rank( pct : bool, default False Whether or not to display the returned rankings in percentile form. -f + Returns ------- same type as caller From 821d11d9f58e9564c38762cf7a3dcfe2e653aaf6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Jun 2021 13:15:52 -0700 Subject: [PATCH 094/132] cpp style fixes --- cpp/include/cudf/transform.hpp | 2 - cpp/src/transform/jit/masked_udf_kernel.cu | 82 ++++++++++------------ cpp/src/transform/transform.cpp | 79 +++++++++------------ 3 files changed, 71 insertions(+), 92 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 35ba7fd53f6..61718254849 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -53,14 +53,12 @@ std::unique_ptr transform( bool is_ptx, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - std::unique_ptr generalized_masked_op( table_view data_view, std::string const& binary_udf, data_type output_type, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Creates a null_mask from `input` by converting `NaN` to null and * preserving existing null values and also returns new null_count. diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index d384ffc61b1..9d62093e4d2 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -28,11 +28,11 @@ #include #include -#include #include +#include -#include #include +#include namespace cudf { namespace transformation { @@ -45,58 +45,48 @@ struct Masked { }; template -__device__ auto make_args(cudf::size_type id, - TypeIn in_ptr, - MaskType in_mask, - OffsetType in_offset) +__device__ auto make_args(cudf::size_type id, TypeIn in_ptr, MaskType in_mask, OffsetType in_offset) { - bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; - return cuda::std::make_tuple(in_ptr[id], valid); + bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; + return cuda::std::make_tuple(in_ptr[id], valid); } -template -__device__ auto make_args(cudf::size_type id, - InType in_ptr, - MaskType in_mask, // in practice, always cudf::bitmask_type const* +template +__device__ auto make_args(cudf::size_type id, + InType in_ptr, + MaskType in_mask, // in practice, always cudf::bitmask_type const* OffsetType in_offset, // in practice, always cudf::size_type - Arguments ... args) { - - bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; - return cuda::std::tuple_cat( - cuda::std::make_tuple(in_ptr[id], valid), - make_args(id, args...) - ); + Arguments... args) +{ + bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; + return cuda::std::tuple_cat(cuda::std::make_tuple(in_ptr[id], valid), make_args(id, args...)); } -template -__global__ -void generic_udf_kernel(cudf::size_type size, - TypeOut* out_data, - bool* out_mask, - Arguments ... args) -{ - - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - int start = tid + blkid * blksz; - int step = blksz * gridsz; - - Masked output; - for (cudf::size_type i=start; i +__global__ void generic_udf_kernel(cudf::size_type size, + TypeOut* out_data, + bool* out_mask, + Arguments... args) +{ + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + int start = tid + blkid * blksz; + int step = blksz * gridsz; + + Masked output; + for (cudf::size_type i = start; i < size; i += step) { + auto func_args = cuda::std::tuple_cat( + cuda::std::make_tuple(&output.value), + make_args(i, args...) // passed int64*, bool*, int64, int64*, bool*, int64 + ); + cuda::std::apply(GENERIC_OP, func_args); + out_data[i] = output.value; + out_mask[i] = output.valid; + } } - } // namespace jit } // namespace transformation } // namespace cudf diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 03d8fa11977..a83a0051ce1 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -17,7 +17,6 @@ #include #include - #include #include #include @@ -27,9 +26,9 @@ #include #include #include +#include #include #include -#include #include @@ -68,47 +67,42 @@ void unary_operation(mutable_column_view output, std::vector make_template_types(column_view outcol_view, table_view data_view) { - std::string mskptr_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())) + "*"; - std::string offset_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); + std::string mskptr_type = + cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())) + "*"; + std::string offset_type = + cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); std::vector template_types(1); template_types[0] = cudf::jit::get_type_name(outcol_view.type()); - for (auto const& col: data_view) { - auto these_types = { - cudf::jit::get_type_name(col.type()) + "*", - mskptr_type, - offset_type - }; + for (auto const& col : data_view) { + auto these_types = {cudf::jit::get_type_name(col.type()) + "*", mskptr_type, offset_type}; template_types.insert(template_types.end(), these_types); - } return template_types; } - void generalized_operation(table_view data_view, - std::string const& binary_udf, - data_type output_type, + std::string const& binary_udf, + data_type output_type, column_view const& outcol_view, column_view const& outmsk_view, rmm::mr::device_memory_resource* mr) { - std::vector template_types = make_template_types(outcol_view, data_view); - std::string generic_kernel_name = - jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") - .instantiate(template_types); + std::string generic_kernel_name = + jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") + .instantiate(template_types); std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( - binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); + binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); // {size, out_ptr, out_mask_ptr, col0_ptr, col0_mask_ptr, col0_offset, col1_ptr...} std::vector kernel_args((data_view.num_columns() * 3) + 3); - cudf::size_type size = outcol_view.size(); + cudf::size_type size = outcol_view.size(); const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view); const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view); kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr}); @@ -123,19 +117,20 @@ void generalized_operation(table_view data_view, data_ptrs[col_idx] = cudf::jit::get_data_ptr(col); mask_ptrs[col_idx] = col.null_mask(); - offsets[col_idx] = col.offset(); - - kernel_args.insert(kernel_args.begin() + 3 * (col_idx + 1), {&data_ptrs[col_idx], &mask_ptrs[col_idx], &offsets[col_idx]}); + offsets[col_idx] = col.offset(); + kernel_args.insert(kernel_args.begin() + 3 * (col_idx + 1), + {&data_ptrs[col_idx], &mask_ptrs[col_idx], &offsets[col_idx]}); } rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) - .get_kernel( - generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // + .get_kernel(generic_kernel_name, + {}, + {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, + {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // ->launch(kernel_args.data()); - } } // namespace jit @@ -164,31 +159,27 @@ std::unique_ptr transform(column_view const& input, return output; } -std::unique_ptr generalized_masked_op(table_view data_view, - std::string const& udf, - data_type output_type, - rmm::mr::device_memory_resource* mr) +std::unique_ptr generalized_masked_op(table_view data_view, + std::string const& udf, + data_type output_type, + rmm::mr::device_memory_resource* mr) { - rmm::cuda_stream_view stream = rmm::cuda_stream_default; - std::unique_ptr output = make_fixed_width_column( - output_type, data_view.num_rows() - ); - std::unique_ptr output_mask = make_fixed_width_column( - cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows() - ); + rmm::cuda_stream_view stream = rmm::cuda_stream_default; + std::unique_ptr output = make_fixed_width_column(output_type, data_view.num_rows()); + std::unique_ptr output_mask = + make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows()); - mutable_column_view output_view = *output; + mutable_column_view output_view = *output; mutable_column_view output_mask_view = *output_mask; - transformation::jit::generalized_operation(data_view, udf, output_type, output_view, output_mask_view, mr); + transformation::jit::generalized_operation( + data_view, udf, output_type, output_view, output_mask_view, mr); auto final_output_mask = cudf::bools_to_mask(output_mask_view); output.get()->set_null_mask(std::move(*(final_output_mask.first))); return output; } - - } // namespace detail std::unique_ptr transform(column_view const& input, @@ -202,8 +193,8 @@ std::unique_ptr transform(column_view const& input, } std::unique_ptr generalized_masked_op(table_view data_view, - std::string const& binary_udf, - data_type output_type, + std::string const& binary_udf, + data_type output_type, rmm::mr::device_memory_resource* mr) { return detail::generalized_masked_op(data_view, binary_udf, output_type, mr); From fb8f1cf2ef277765f709ad7ffcd23c9c4417039b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Jun 2021 13:57:25 -0700 Subject: [PATCH 095/132] cache ptx --- python/cudf/cudf/core/frame.py | 3 ++- python/cudf/cudf/core/udf/pipeline.py | 10 +++++----- python/cudf/cudf/utils/cudautils.py | 7 +++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index b4440805618..81e861e31b9 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1454,11 +1454,12 @@ def _quantiles( result._copy_type_metadata(self) return result + @annotate("APPLY", color="purple", domain="cudf_python") def _apply(self, func): ''' Apply `func` across the rows of the frame. ''' - output_dtype, ptx = cudf.core.udf.pipeline.compile_udf(func, self.dtypes) + output_dtype, ptx = cudf.core.udf.pipeline.compile_masked_udf(func, self.dtypes) result = cudf._lib.transform.masked_udf(self, ptx, output_dtype) return result diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py index ab80400eb29..bb49182fd94 100644 --- a/python/cudf/cudf/core/udf/pipeline.py +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -1,9 +1,11 @@ from cudf.core.udf.typing import MaskedType from numba.np import numpy_support from numba import cuda +from cudf.utils import cudautils +from nvtx import annotate - -def compile_udf(func, dtypes): +@annotate("NUMBA JIT", color="green", domain="cudf_python") +def compile_masked_udf(func, dtypes): ''' Generate an inlineable PTX function that will be injected into a variadic kernel inside libcudf @@ -16,9 +18,7 @@ def compile_udf(func, dtypes): for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) ) # Get the inlineable PTX function - ptx, numba_output_type = cuda.compile_ptx_for_current_device( - func, to_compiler_sig, device=True - ) + ptx, numba_output_type = cudautils.compile_udf(func, to_compiler_sig) numpy_output_type = numpy_support.as_dtype( numba_output_type.value_type ) diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index 262fe304dd8..312fbc425dd 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -262,10 +262,13 @@ def compile_udf(udf, type_signature): ptx_code, return_type = cuda.compile_ptx_for_current_device( udf, type_signature, device=True ) - output_type = numpy_support.as_dtype(return_type) + if not isinstance(return_type, cudf.core.udf.typing.MaskedType): + output_type = numpy_support.as_dtype(return_type).type + else: + output_type = return_type # Populate the cache for this function - res = (ptx_code, output_type.type) + res = (ptx_code, output_type) _udf_code_cache[key] = res return res From 5d77b2ba63072c09815bd11ba341ff4a2c4fb75f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Jun 2021 15:25:43 -0700 Subject: [PATCH 096/132] partially address reviews --- cpp/src/transform/transform.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index a83a0051ce1..f9707de183f 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -72,12 +72,12 @@ std::vector make_template_types(column_view outcol_view, table_view std::string offset_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); - std::vector template_types(1); - template_types[0] = cudf::jit::get_type_name(outcol_view.type()); + std::vector template_types; + template_types.reserve(data_view.num_columns() + 1); + template_types.push_back(cudf::jit::get_type_name(outcol_view.type())); for (auto const& col : data_view) { auto these_types = {cudf::jit::get_type_name(col.type()) + "*", mskptr_type, offset_type}; - template_types.insert(template_types.end(), these_types); } return template_types; @@ -86,8 +86,8 @@ std::vector make_template_types(column_view outcol_view, table_view void generalized_operation(table_view data_view, std::string const& binary_udf, data_type output_type, - column_view const& outcol_view, - column_view const& outmsk_view, + mutable_column_view outcol_view, + mutable_column_view outmsk_view, rmm::mr::device_memory_resource* mr) { std::vector template_types = make_template_types(outcol_view, data_view); @@ -100,16 +100,21 @@ void generalized_operation(table_view data_view, binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); // {size, out_ptr, out_mask_ptr, col0_ptr, col0_mask_ptr, col0_offset, col1_ptr...} - std::vector kernel_args((data_view.num_columns() * 3) + 3); + std::vector kernel_args; + kernel_args.reserve((data_view.num_columns() * 3) + 3); cudf::size_type size = outcol_view.size(); const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view); const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view); kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr}); - std::vector data_ptrs(data_view.num_columns()); - std::vector mask_ptrs(data_view.num_columns()); - std::vector offsets(data_view.num_columns()); + std::vector data_ptrs; + std::vector mask_ptrs; + std::vector offsets; + + data_ptrs.reserve(data_view.num_columns()); + mask_ptrs.reserve(data_view.num_columns()); + offsets.reserve(data_view.num_columns()); column_view col; for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { @@ -169,13 +174,10 @@ std::unique_ptr generalized_masked_op(table_view data_view, std::unique_ptr output_mask = make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows()); - mutable_column_view output_view = *output; - mutable_column_view output_mask_view = *output_mask; - transformation::jit::generalized_operation( - data_view, udf, output_type, output_view, output_mask_view, mr); + data_view, udf, output_type, *output, *output_mask, mr); - auto final_output_mask = cudf::bools_to_mask(output_mask_view); + auto final_output_mask = cudf::bools_to_mask(*output_mask); output.get()->set_null_mask(std::move(*(final_output_mask.first))); return output; } From f863ba1a96428f3dc245ea4e2065f337ea3dcd9e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 10 Jun 2021 14:04:42 -0700 Subject: [PATCH 097/132] switch to push_back --- cpp/src/transform/transform.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index f9707de183f..6512a5aabe9 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -77,8 +77,9 @@ std::vector make_template_types(column_view outcol_view, table_view template_types.push_back(cudf::jit::get_type_name(outcol_view.type())); for (auto const& col : data_view) { - auto these_types = {cudf::jit::get_type_name(col.type()) + "*", mskptr_type, offset_type}; - template_types.insert(template_types.end(), these_types); + template_types.push_back(cudf::jit::get_type_name(col.type()) + "*"); + template_types.push_back(mskptr_type); + template_types.push_back(offset_type); } return template_types; } @@ -120,9 +121,9 @@ void generalized_operation(table_view data_view, for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { col = data_view.column(col_idx); - data_ptrs[col_idx] = cudf::jit::get_data_ptr(col); - mask_ptrs[col_idx] = col.null_mask(); - offsets[col_idx] = col.offset(); + data_ptrs.push_back(cudf::jit::get_data_ptr(col)); + mask_ptrs.push_back(col.null_mask()); + offsets.push_back(col.offset()); kernel_args.insert(kernel_args.begin() + 3 * (col_idx + 1), {&data_ptrs[col_idx], &mask_ptrs[col_idx], &offsets[col_idx]}); From 92cd6eb1117aa220b7e1f6271d3b2083ece17ff7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 10 Jun 2021 14:49:23 -0700 Subject: [PATCH 098/132] more pushing back --- cpp/src/transform/transform.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 6512a5aabe9..5ed5461662f 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -125,8 +125,10 @@ void generalized_operation(table_view data_view, mask_ptrs.push_back(col.null_mask()); offsets.push_back(col.offset()); - kernel_args.insert(kernel_args.begin() + 3 * (col_idx + 1), - {&data_ptrs[col_idx], &mask_ptrs[col_idx], &offsets[col_idx]}); + kernel_args.push_back(&data_ptrs[col_idx]); + kernel_args.push_back(&mask_ptrs[col_idx]); + kernel_args.push_back(&offsets[col_idx]); + } rmm::cuda_stream_view generic_stream; From 4b08c51237878e92380a63b795255c41b07c789d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 10 Jun 2021 14:51:26 -0700 Subject: [PATCH 099/132] xfail pow tests due to issue cudf/8470 --- python/cudf/cudf/tests/test_udf_masked_ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 6b50ca8dd7e..7d32daf63f1 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -12,8 +12,7 @@ operator.truediv, operator.floordiv, operator.mod, - operator.pow - + pytest.param(operator.pow, marks=pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/8470")) ] comparison_ops = [ From 48733b2fe09f2a5870f7f56aacfe55f541f6fe6a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Jun 2021 06:08:19 -0700 Subject: [PATCH 100/132] style fixes --- cpp/src/transform/transform.cpp | 1 - python/cudf/cudf/_lib/transform.pyx | 1 + python/cudf/cudf/core/dataframe.py | 1 - python/cudf/cudf/core/frame.py | 11 +- python/cudf/cudf/core/udf/lowering.py | 75 +++++----- python/cudf/cudf/core/udf/pipeline.py | 22 +-- python/cudf/cudf/core/udf/typing.py | 137 ++++++++---------- python/cudf/cudf/tests/test_udf_masked_ops.py | 135 ++++++++--------- 8 files changed, 181 insertions(+), 202 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 5ed5461662f..fcd2b6da5a4 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -128,7 +128,6 @@ void generalized_operation(table_view data_view, kernel_args.push_back(&data_ptrs[col_idx]); kernel_args.push_back(&mask_ptrs[col_idx]); kernel_args.push_back(&offsets[col_idx]); - } rmm::cuda_stream_view generic_stream; diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 23854cd3e98..3ba9aac5687 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -122,6 +122,7 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) + def masked_udf(Table incols, op, output_type): cdef table_view data_view = incols.data_view() cdef string c_str = op.encode("UTF-8") diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 78cff4af763..75ae2fb13db 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4740,7 +4740,6 @@ def apply(self, func, axis=1): return func(self) - @applyutils.doc_apply() def apply_rows( self, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index c4716d28e86..e1eafae4eca 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1456,14 +1456,15 @@ def _quantiles( @annotate("APPLY", color="purple", domain="cudf_python") def _apply(self, func): - ''' - Apply `func` across the rows of the frame. - ''' - output_dtype, ptx = cudf.core.udf.pipeline.compile_masked_udf(func, self.dtypes) + """ + Apply `func` across the rows of the frame. + """ + output_dtype, ptx = cudf.core.udf.pipeline.compile_masked_udf( + func, self.dtypes + ) result = cudf._lib.transform.masked_udf(self, ptx, output_dtype) return result - def rank( self, axis=0, diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 3fffc8ac972..e18ee37e04a 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -1,18 +1,21 @@ -from . import classes +import operator + +from llvmlite import ir +from numba.core import cgutils +from numba.core.typing import signature as nb_signature from numba.cuda.cudaimpl import ( lower as cuda_lower, + registry as cuda_impl_registry, registry as cuda_lowering_registry, ) -from numba.core.typing import signature as nb_signature -from cudf.core.udf.typing import MaskedType, NAType -from numba.core import cgutils -from numba.cuda.cudaimpl import registry as cuda_impl_registry -import operator from numba.extending import lower_builtin, types -from llvmlite import ir +from cudf.core.udf.typing import MaskedType, NAType + +from . import classes from ._ops import arith_ops, comparison_ops + @cuda_lowering_registry.lower_constant(NAType) def constant_na(context, builder, ty, pyval): # This handles None, etc. @@ -25,15 +28,17 @@ def constant_na(context, builder, ty, pyval): # the implementation details of how to do that. This is where # we can involve both validities in constructing the answer + def make_arithmetic_op(op): - ''' + """ Make closures that implement arithmetic operations. See register_arithmetic_op for details. - ''' + """ + def masked_scalar_op_impl(context, builder, sig, args): - ''' + """ Implement `MaskedType` + `MaskedType` - ''' + """ # MaskedType(...), MaskedType(...) masked_type_1, masked_type_2 = sig.args # MaskedType(...) @@ -49,8 +54,9 @@ def masked_scalar_op_impl(context, builder, sig, args): ) # we will return an output struct - result = cgutils.create_struct_proxy(masked_return_type)(context, - builder) + result = cgutils.create_struct_proxy(masked_return_type)( + context, builder + ) # compute output validity valid = builder.and_(m1.valid, m2.valid) @@ -65,16 +71,17 @@ def masked_scalar_op_impl(context, builder, sig, args): nb_signature( masked_return_type.value_type, masked_type_1.value_type, - masked_type_2.value_type + masked_type_2.value_type, ), - (m1.value, m2.value) + (m1.value, m2.value), ) return result._getvalue() + return masked_scalar_op_impl def register_arithmetic_op(op): - ''' + """ Register a lowering implementation for the arithmetic op `op`. @@ -85,17 +92,17 @@ def register_arithmetic_op(op): This function makes and lowers a closure for one op. - ''' + """ to_lower_op = make_arithmetic_op(op) cuda_lower(op, MaskedType, MaskedType)(to_lower_op) def masked_scalar_null_op_impl(context, builder, sig, args): - ''' + """ Implement `MaskedType` + `NAType` The answer to this is known up front so no actual addition needs to take place - ''' + """ return_type = sig.return_type # MaskedType(...) result = cgutils.create_struct_proxy(MaskedType(return_type.value_type))( @@ -109,9 +116,9 @@ def masked_scalar_null_op_impl(context, builder, sig, args): def make_const_op(op): def masked_scalar_const_op_impl(context, builder, sig, args): - ''' + """ Implement `MaskedType` + constant - ''' + """ masked_type, const_type = sig.args masked_value, numeric_value = args @@ -130,17 +137,17 @@ def masked_scalar_const_op_impl(context, builder, sig, args): builder, lambda x, y: op(x, y), nb_signature( - return_type.value_type, - masked_type.value_type, - const_type + return_type.value_type, masked_type.value_type, const_type ), - (indata.value, numeric_value) + (indata.value, numeric_value), ) result.valid = context.get_constant(types.boolean, 1) return result._getvalue() + return masked_scalar_const_op_impl + def make_reflected_const_op(op): def masked_scalar_reflected_const_op_impl(context, builder, sig, args): const_type, masked_type = sig.args @@ -161,17 +168,16 @@ def masked_scalar_reflected_const_op_impl(context, builder, sig, args): builder, lambda x, y: op(x, y), nb_signature( - return_type.value_type, - const_type, - masked_type.value_type + return_type.value_type, const_type, masked_type.value_type ), - (numeric_value, indata.value) + (numeric_value, indata.value), ) result.valid = context.get_constant(types.boolean, 1) return result._getvalue() + return masked_scalar_reflected_const_op_impl - + def register_const_op(op): to_lower_op = make_const_op(op) @@ -193,9 +199,9 @@ def register_const_op(op): @cuda_lower(operator.is_, MaskedType, NAType) @cuda_lower(operator.is_, NAType, MaskedType) def masked_scalar_is_null_impl(context, builder, sig, args): - ''' + """ Implement `MaskedType` is `NA` - ''' + """ if isinstance(sig.args[1], NAType): masked_type, na = sig.args value = args[0] @@ -255,8 +261,9 @@ def cast_na_to_masked(context, builder, fromty, toty, val): @cuda_impl_registry.lower_cast(MaskedType, MaskedType) def cast_masked_to_masked(context, builder, fromty, toty, val): operand = cgutils.create_struct_proxy(fromty)(context, builder, value=val) - casted = context.cast(builder, operand.value, fromty.value_type, - toty.value_type) + casted = context.cast( + builder, operand.value, fromty.value_type, toty.value_type + ) ext = cgutils.create_struct_proxy(toty)(context, builder) ext.value = casted ext.valid = operand.valid diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py index bb49182fd94..c7b8be92c00 100644 --- a/python/cudf/cudf/core/udf/pipeline.py +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -1,33 +1,32 @@ -from cudf.core.udf.typing import MaskedType from numba.np import numpy_support -from numba import cuda -from cudf.utils import cudautils from nvtx import annotate +from cudf.core.udf.typing import MaskedType +from cudf.utils import cudautils + + @annotate("NUMBA JIT", color="green", domain="cudf_python") def compile_masked_udf(func, dtypes): - ''' + """ Generate an inlineable PTX function that will be injected into a variadic kernel inside libcudf assume all input types are `MaskedType(input_col.dtype)` and then compile the requestied PTX function as a function over those types - ''' + """ to_compiler_sig = tuple( MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes) ) # Get the inlineable PTX function ptx, numba_output_type = cudautils.compile_udf(func, to_compiler_sig) - numpy_output_type = numpy_support.as_dtype( - numba_output_type.value_type - ) + numpy_output_type = numpy_support.as_dtype(numba_output_type.value_type) return numpy_output_type, ptx def nulludf(func): - ''' + """ Mimic pandas API: def f(x, y): @@ -38,13 +37,16 @@ def f(x, y): `DataFrame` sends `self` in as `row` and subsequently we end up calling `f` on the resulting columns since the dataframe is dict-like - ''' + """ + def wrapper(*args): from cudf import DataFrame + # This probably creates copies but is fine for now to_udf_table = DataFrame( {idx: arg for idx, arg in zip(range(len(args)), args)} ) # Frame._apply return to_udf_table._apply(func) + return wrapper diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 3ed0d0fd265..726664077c5 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -1,23 +1,33 @@ -from . import classes +import operator + from numba import types -from cudf.core.scalar import _NAType -from numba.core.extending import (typeof_impl, register_model, models, - make_attribute_wrapper) -from numba.cuda.cudadecl import registry as cuda_decl_registry -from numba.core.typing.templates import (AbstractTemplate, AttributeTemplate, - ConcreteTemplate) +from numba.core.extending import ( + make_attribute_wrapper, + models, + register_model, + typeof_impl, +) from numba.core.typing import signature as nb_signature +from numba.core.typing.templates import ( + AbstractTemplate, + AttributeTemplate, + ConcreteTemplate, +) from numba.core.typing.typeof import typeof +from numba.cuda.cudadecl import registry as cuda_decl_registry +from cudf.core.scalar import _NAType -import operator +from . import classes from ._ops import arith_ops, comparison_ops + class MaskedType(types.Type): - ''' + """ A Numba type consisting of a value of some primitive type and a validity boolean, over which we can define math ops - ''' + """ + def __init__(self, value): # MaskedType in Numba shall be parameterized # with a value type @@ -30,14 +40,14 @@ def __repr__(self): return f"MaskedType({self.value_type})" def __hash__(self): - ''' + """ Needed so that numba caches type instances with different `value_type` separately. - ''' + """ return self.__repr__().__hash__() def unify(self, context, other): - ''' + """ Logic for sorting out what to do when the UDF conditionally returns a `MaskedType`, an `NAType`, or a literal based off the data at runtime. @@ -65,14 +75,16 @@ def f(x, y): numba now sees this as f(x: MaskedType(dtype_1), y: MaskedType(dtype_2)) -> MaskedType(dtype_unified) - ''' + """ # If we have Masked and NA, the output should be a # MaskedType with the original type as its value_type if isinstance(other, NAType): return self elif isinstance(other, MaskedType): - return MaskedType(context.unify_pairs(self.value_type, other.value_type)) + return MaskedType( + context.unify_pairs(self.value_type, other.value_type) + ) # if we have MaskedType and something that results in a # scalar, unify between the MaskedType's value_type @@ -108,13 +120,15 @@ def typeof_masked(val, c): class MaskedConstructor(ConcreteTemplate): key = classes.Masked - cases = [nb_signature(MaskedType(t), t, types.boolean) - for t in (types.integer_domain | types.real_domain)] + cases = [ + nb_signature(MaskedType(t), t, types.boolean) + for t in (types.integer_domain | types.real_domain) + ] # Provide access to `m.value` and `m.valid` in a kernel for a Masked `m`. -make_attribute_wrapper(MaskedType, 'value', 'value') -make_attribute_wrapper(MaskedType, 'valid', 'valid') +make_attribute_wrapper(MaskedType, "value", "value") +make_attribute_wrapper(MaskedType, "valid", "valid") # Typing for `classes.Masked` @@ -129,8 +143,9 @@ def resolve_Masked(self, mod): # Registration of the global is also needed for Numba to type classes.Masked cuda_decl_registry.register_global(classes, types.Module(classes)) # For typing bare Masked (as in `from .classes import Masked` -cuda_decl_registry.register_global(classes.Masked, - types.Function(MaskedConstructor)) +cuda_decl_registry.register_global( + classes.Masked, types.Function(MaskedConstructor) +) # Tell numba how `MaskedType` is constructed on the backend in terms @@ -146,22 +161,23 @@ def __init__(self, dmm, fe_type): class NAType(types.Type): - ''' + """ A type for handling ops against nulls Exists so we can: 1. Teach numba that all occurances of `cudf.NA` are to be read as instances of this type instead 2. Define ops like `if x is cudf.NA` where `x` is of type `Masked` to mean `if x.valid is False` - ''' + """ + def __init__(self): super().__init__(name="NA") def unify(self, context, other): - ''' + """ Masked <-> NA works from above Literal <-> NA -> Masked - ''' + """ if isinstance(other, MaskedType): # bounce to MaskedType.unify return None @@ -177,11 +193,11 @@ def unify(self, context, other): @typeof_impl.register(_NAType) def typeof_na(val, c): - ''' + """ Tie instances of _NAType (cudf.NA) to our NAType. Effectively make it so numba sees `cudf.NA` as an instance of this NAType -> handle it accordingly. - ''' + """ return na_type @@ -199,11 +215,11 @@ def typeof_na(val, c): # are parameterized with `value_type` and what flavor of `Masked` to return. class MaskedScalarArithOp(AbstractTemplate): def generic(self, args, kws): - ''' + """ Typing for `Masked` + `Masked` Numba expects a valid numba type to be returned if typing is successful else `None` signifies the error state (this is common across numba) - ''' + """ if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): # In the case of op(Masked, Masked), the return type is a Masked # such that Masked.value is the primitive type that would have @@ -211,41 +227,29 @@ def generic(self, args, kws): return_type = self.context.resolve_function_type( self.key, (args[0].value_type, args[1].value_type), kws ).return_type - return nb_signature( - MaskedType(return_type), - args[0], - args[1], - ) + return nb_signature(MaskedType(return_type), args[0], args[1],) class MaskedScalarNullOp(AbstractTemplate): def generic(self, args, kws): - ''' + """ Typing for `Masked` + `NA` Handles situations like `x + cudf.NA` - ''' + """ if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): # In the case of op(Masked, NA), the result has the same # dtype as the original regardless of what it is - return nb_signature( - args[0], - args[0], - na_type, - ) + return nb_signature(args[0], args[0], na_type,) elif isinstance(args[0], NAType) and isinstance(args[1], MaskedType): - return nb_signature( - args[1], - na_type, - args[1] - ) + return nb_signature(args[1], na_type, args[1]) class MaskedScalarScalarOp(AbstractTemplate): def generic(self, args, kws): - ''' + """ Typing for `Masked` + a scalar. handles situations like `x + 1` - ''' + """ if isinstance(args[0], MaskedType) and isinstance( args[1], types.Number ): @@ -254,56 +258,41 @@ def generic(self, args, kws): return_type = self.context.resolve_function_type( self.key, (args[0].value_type, args[1]), kws ).return_type - return nb_signature( - MaskedType(return_type), - args[0], - args[1], - ) + return nb_signature(MaskedType(return_type), args[0], args[1],) elif isinstance(args[0], types.Number) and isinstance( args[1], MaskedType ): return_type = self.context.resolve_function_type( self.key, (args[1].value_type, args[0]), kws ).return_type - return nb_signature( - MaskedType(return_type), - args[0], - args[1], - ) + return nb_signature(MaskedType(return_type), args[0], args[1],) @cuda_decl_registry.register_global(operator.is_) class MaskedScalarIsNull(AbstractTemplate): - ''' + """ Typing for `Masked is cudf.NA` - ''' + """ + def generic(self, args, kws): if isinstance(args[0], MaskedType) and isinstance(args[1], NAType): - return nb_signature( - types.boolean, - args[0], - na_type) + return nb_signature(types.boolean, args[0], na_type) elif isinstance(args[1], MaskedType) and isinstance(args[0], NAType): - return nb_signature( - types.boolean, - na_type, - args[1]) + return nb_signature(types.boolean, na_type, args[1]) @cuda_decl_registry.register_global(operator.truth) class MaskedScalarTruth(AbstractTemplate): - ''' + """ Typing for `if Masked` Used for `if x > y` The truthiness of a MaskedType shall be the truthiness of the `value` stored therein - ''' + """ + def generic(self, args, kws): if isinstance(args[0], MaskedType): - return nb_signature( - types.boolean, - MaskedType(types.boolean) - ) + return nb_signature(types.boolean, MaskedType(types.boolean)) for op in arith_ops + comparison_ops: diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 7d32daf63f1..e1da9c4e73d 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -1,9 +1,11 @@ -import cudf -from cudf.core.udf.pipeline import nulludf -from cudf.tests.utils import assert_eq, NUMERIC_TYPES +import operator + import pandas as pd import pytest -import operator + +import cudf +from cudf.core.udf.pipeline import nulludf +from cudf.tests.utils import NUMERIC_TYPES, assert_eq arith_ops = [ operator.add, @@ -12,7 +14,12 @@ operator.truediv, operator.floordiv, operator.mod, - pytest.param(operator.pow, marks=pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/8470")) + pytest.param( + operator.pow, + marks=pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/8470" + ), + ), ] comparison_ops = [ @@ -21,7 +28,7 @@ operator.lt, operator.le, operator.gt, - operator.ge + operator.ge, ] @@ -30,21 +37,15 @@ def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): pdf = data.to_pandas(nullable=True) expect = pdf.apply( - lambda row: func_pdf( - *[row[i] for i in data.columns] - ), - axis=1 + lambda row: func_pdf(*[row[i] for i in data.columns]), axis=1 ) obtain = gdf.apply( - lambda row: func_gdf( - *[row[i] for i in data.columns] - ), - axis=1 + lambda row: func_gdf(*[row[i] for i in data.columns]), axis=1 ) assert_eq(expect, obtain, **kwargs) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize("op", arith_ops) def test_arith_masked_vs_masked(op): # This test should test all the typing # and lowering for arithmetic ops between @@ -56,14 +57,11 @@ def func_pdf(x, y): def func_gdf(x, y): return op(x, y) - gdf = cudf.DataFrame({ - 'a': [1, None, 3, None], - 'b': [4, 5, None, None] - }) + gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', comparison_ops) +@pytest.mark.parametrize("op", comparison_ops) def test_compare_masked_vs_masked(op): # this test should test all the # typing and lowering for comparisons @@ -78,15 +76,14 @@ def func_gdf(x, y): # we should get: # [?, ?, , , ] - gdf = cudf.DataFrame({ - 'a': [1, 0, None, 1, None], - 'b': [0, 1, 0, None, None] - }) + gdf = cudf.DataFrame( + {"a": [1, 0, None, 1, None], "b": [0, 1, 0, None, None]} + ) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', arith_ops) -@pytest.mark.parametrize('constant', [1, 1.5]) +@pytest.mark.parametrize("op", arith_ops) +@pytest.mark.parametrize("constant", [1, 1.5]) def test_arith_masked_vs_constant(op, constant): def func_pdf(x): return op(x, constant) @@ -96,14 +93,13 @@ def func_gdf(x): return op(x, constant) # Just a single column -> result will be all NA - gdf = cudf.DataFrame({ - 'data': [1, 2, None] - }) + gdf = cudf.DataFrame({"data": [1, 2, None]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', arith_ops) -@pytest.mark.parametrize('constant', [1, 1.5]) + +@pytest.mark.parametrize("op", arith_ops) +@pytest.mark.parametrize("constant", [1, 1.5]) def test_arith_masked_vs_constant_reflected(op, constant): def func_pdf(x): return op(constant, x) @@ -113,14 +109,12 @@ def func_gdf(x): return op(constant, x) # Just a single column -> result will be all NA - gdf = cudf.DataFrame({ - 'data': [1, 2, None] - }) + gdf = cudf.DataFrame({"data": [1, 2, None]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize("op", arith_ops) def test_arith_masked_vs_null(op): def func_pdf(x): return op(x, pd.NA) @@ -129,13 +123,11 @@ def func_pdf(x): def func_gdf(x): return op(x, cudf.NA) - gdf = cudf.DataFrame({ - 'data': [1, None, 3] - }) + gdf = cudf.DataFrame({"data": [1, None, 3]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('op', arith_ops) +@pytest.mark.parametrize("op", arith_ops) def test_arith_masked_vs_null_reflected(op): def func_pdf(x): return op(pd.NA, x) @@ -144,9 +136,7 @@ def func_pdf(x): def func_gdf(x): return op(cudf.NA, x) - gdf = cudf.DataFrame({ - 'data': [1, None, 3] - }) + gdf = cudf.DataFrame({"data": [1, None, 3]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @@ -164,21 +154,18 @@ def func_gdf(x, y): else: return x + y - gdf = cudf.DataFrame({ - 'a': [1, None, 3, None], - 'b': [4, 5, None, None] - }) + gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('dtype_a', list(NUMERIC_TYPES)) -@pytest.mark.parametrize('dtype_b', list(NUMERIC_TYPES)) +@pytest.mark.parametrize("dtype_a", list(NUMERIC_TYPES)) +@pytest.mark.parametrize("dtype_b", list(NUMERIC_TYPES)) def test_apply_mixed_dtypes(dtype_a, dtype_b): - ''' - Test that operations can be performed between columns + """ + Test that operations can be performed between columns of different dtypes and return a column with the correct values and nulls - ''' + """ # TODO: Parameterize over the op here def func_pdf(x, y): return x + y @@ -187,25 +174,21 @@ def func_pdf(x, y): def func_gdf(x, y): return x + y - gdf = cudf.DataFrame({ - 'a': [1.5, None, 3, None], - 'b': [4, 5, None, None] - }) - gdf['a'] = gdf['a'].astype(dtype_a) - gdf['b'] = gdf['b'].astype(dtype_b) + gdf = cudf.DataFrame({"a": [1.5, None, 3, None], "b": [4, 5, None, None]}) + gdf["a"] = gdf["a"].astype(dtype_a) + gdf["b"] = gdf["b"].astype(dtype_b) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) -@pytest.mark.parametrize('val', [ - 5, 5.5 -]) +@pytest.mark.parametrize("val", [5, 5.5]) def test_apply_return_literal(val): - ''' + """ Test unification codepath for scalars and MaskedType makes sure that numba knows how to cast a scalar value to a MaskedType - ''' + """ + def func_pdf(x, y): if x is not pd.NA and x < 2: return val @@ -214,23 +197,21 @@ def func_pdf(x, y): @nulludf def func_gdf(x, y): - if x is not cudf.NA and x < 2: + if x is not cudf.NA and x < 2: return val else: return x + y - gdf = cudf.DataFrame({ - 'a': [1, None, 3, None], - 'b': [4, 5, None, None] - }) + gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) def test_apply_return_null(): - ''' + """ Tests casting / unification of Masked and NA - ''' + """ + def func_pdf(x): if x is pd.NA: return pd.NA @@ -244,7 +225,7 @@ def func_gdf(x): else: return x - gdf = cudf.DataFrame({'a': [1, None, 3]}) + gdf = cudf.DataFrame({"a": [1, None, 3]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @@ -262,7 +243,7 @@ def func_gdf(x): else: return cudf.NA - gdf = cudf.DataFrame({'a': [1, 3, 6]}) + gdf = cudf.DataFrame({"a": [1, 3, 6]}) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) @@ -277,7 +258,7 @@ def func_pdf(w, x, y, z): elif x + y is pd.NA: return 2.5 else: - return (y > 2) + return y > 2 @nulludf def func_gdf(w, x, y, z): @@ -290,14 +271,14 @@ def func_gdf(w, x, y, z): elif x + y is cudf.NA: return 2.5 else: - return (y > 2) + return y > 2 gdf = cudf.DataFrame( { - 'a': [1, 3, 6, 0, None, 5, None], - 'b': [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0], - 'c': [2, 3, 6, 0, None, 5, None], - 'd': [4, None, 6, 0, None, 5, None], + "a": [1, 3, 6, 0, None, 5, None], + "b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0], + "c": [2, 3, 6, 0, None, 5, None], + "d": [4, None, 6, 0, None, 5, None], } ) run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False) From 16018f66086639e3b612adef47640466dd2bc548 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Jun 2021 06:13:56 -0700 Subject: [PATCH 101/132] more style fixes --- python/cudf/cudf/core/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 75ae2fb13db..fa0e381a3e7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4719,7 +4719,7 @@ def apply(self, func, axis=1): defined function row wise over a dataframe, with true null handling. Works with UDFs using `core.udf.pipeline.nulludf` and returns a single series. Uses numba to jit compile the - function to PTX via LLVM. + function to PTX via LLVM. Parameters ---------- From c91737eef2c5ea6c031efdca911cc8f11499c167 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Jun 2021 06:19:31 -0700 Subject: [PATCH 102/132] update tests and _ops --- python/cudf/cudf/core/udf/_ops.py | 5 +- .../cudf/tests/test_extension_compilation.py | 88 +++++++++---------- 2 files changed, 42 insertions(+), 51 deletions(-) diff --git a/python/cudf/cudf/core/udf/_ops.py b/python/cudf/cudf/core/udf/_ops.py index 0fce3fdbafb..25201356fd9 100644 --- a/python/cudf/cudf/core/udf/_ops.py +++ b/python/cudf/cudf/core/udf/_ops.py @@ -7,8 +7,7 @@ operator.truediv, operator.floordiv, operator.mod, - operator.pow - + operator.pow, ] comparison_ops = [ @@ -17,5 +16,5 @@ operator.lt, operator.le, operator.gt, - operator.ge + operator.ge, ] diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index efd0509f475..e527fd0af17 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,13 +1,12 @@ import operator -import pytest +import pytest from numba import cuda, types from numba.cuda import compile_ptx from cudf import NA -from cudf.core.udf.typing import MaskedType from cudf.core.udf.classes import Masked - +from cudf.core.udf.typing import MaskedType arith_ops = ( operator.add, @@ -25,12 +24,10 @@ operator.eq, operator.ne, operator.ge, - operator.gt + operator.gt, ) -unary_ops = ( - operator.truth, -) +unary_ops = (operator.truth,) ops = arith_ops + comparison_ops @@ -57,10 +54,9 @@ number_ids = tuple(str(t) for t in number_types) -@pytest.mark.parametrize('op', unary_ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("op", unary_ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_compile_masked_unary(op, ty): - def func(x): return op(x) @@ -68,10 +64,9 @@ def func(x): ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) -@pytest.mark.parametrize('op', arith_ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("op", arith_ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_execute_masked_binary(op, ty): - @cuda.jit(device=True) def func(x, y): return op(x, y) @@ -92,21 +87,20 @@ def test_kernel(x, y): # Check masks are as expected, and unmasked result matches masked # result if r0.valid: - raise RuntimeError('Expected r0 to be invalid') + raise RuntimeError("Expected r0 to be invalid") if not r1.valid: - raise RuntimeError('Expected r1 to be valid') + raise RuntimeError("Expected r1 to be valid") if u != r1.value: - print('Values: ', u, r1.value) - raise RuntimeError('u != r1.value') + print("Values: ", u, r1.value) + raise RuntimeError("u != r1.value") test_kernel[1, 1](1, 2) -@pytest.mark.parametrize('op', ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) -@pytest.mark.parametrize('constant', [1, 1.5]) +@pytest.mark.parametrize("op", ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) +@pytest.mark.parametrize("constant", [1, 1.5]) def test_compile_arith_masked_vs_constant(op, ty, constant): - def func(x): return op(x, constant) @@ -120,11 +114,10 @@ def func(x): assert resty.value_type == um_resty -@pytest.mark.parametrize('op', ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) -@pytest.mark.parametrize('constant', [1, 1.5]) +@pytest.mark.parametrize("op", ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) +@pytest.mark.parametrize("constant", [1, 1.5]) def test_compile_arith_constant_vs_masked(op, ty, constant): - def func(x): return op(constant, x) @@ -134,10 +127,9 @@ def func(x): assert isinstance(resty, MaskedType) -@pytest.mark.parametrize('op', ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("op", ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_compile_arith_masked_vs_na(op, ty): - def func(x): return op(x, NA) @@ -147,10 +139,9 @@ def func(x): assert isinstance(resty, MaskedType) -@pytest.mark.parametrize('op', ops) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("op", ops) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_compile_arith_na_vs_masked(op, ty): - def func(x): return op(NA, x) @@ -158,14 +149,15 @@ def func(x): ptx, resty = compile_ptx(func, (MaskedType(ty),), cc=cc, device=True) -@pytest.mark.parametrize('op', ops) -@pytest.mark.parametrize('ty1', number_types, ids=number_ids) -@pytest.mark.parametrize('ty2', number_types, ids=number_ids) -@pytest.mark.parametrize('masked', ((False, True), (True, False), - (True, True)), - ids=('um', 'mu', 'mm')) +@pytest.mark.parametrize("op", ops) +@pytest.mark.parametrize("ty1", number_types, ids=number_ids) +@pytest.mark.parametrize("ty2", number_types, ids=number_ids) +@pytest.mark.parametrize( + "masked", + ((False, True), (True, False), (True, True)), + ids=("um", "mu", "mm"), +) def test_compile_arith_masked_ops(op, ty1, ty2, masked): - def func(x, y): return op(x, y) @@ -187,7 +179,7 @@ def func_na_is_x(x): return NA is x -@pytest.mark.parametrize('fn', (func_x_is_na, func_na_is_x)) +@pytest.mark.parametrize("fn", (func_x_is_na, func_na_is_x)) def test_is_na(fn): valid = Masked(1, True) @@ -201,10 +193,10 @@ def test_kernel(): invalid_is_na = device_fn(invalid) if valid_is_na: - raise RuntimeError('Valid masked value is NA and should not be') + raise RuntimeError("Valid masked value is NA and should not be") if not invalid_is_na: - raise RuntimeError('Invalid masked value is not NA and should be') + raise RuntimeError("Invalid masked value is not NA and should be") test_kernel[1, 1]() @@ -273,8 +265,8 @@ def func_na_le(x): ) -@pytest.mark.parametrize('fn', na_comparison_funcs) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("fn", na_comparison_funcs) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_na_masked_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) @@ -289,18 +281,18 @@ def test_kernel(): invalid_cmp_na = device_fn(invalid_masked) if valid_cmp_na: - raise RuntimeError('Valid masked value compared True with NA') + raise RuntimeError("Valid masked value compared True with NA") if invalid_cmp_na: - raise RuntimeError('Invalid masked value compared True with NA') + raise RuntimeError("Invalid masked value compared True with NA") test_kernel[1, 1]() # xfail because scalars do not yet cast for a comparison to NA @pytest.mark.xfail -@pytest.mark.parametrize('fn', na_comparison_funcs) -@pytest.mark.parametrize('ty', number_types, ids=number_ids) +@pytest.mark.parametrize("fn", na_comparison_funcs) +@pytest.mark.parametrize("ty", number_types, ids=number_ids) def test_na_scalar_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) @@ -312,6 +304,6 @@ def test_kernel(): unmasked_cmp_na = device_fn(unmasked) if unmasked_cmp_na: - raise RuntimeError('Unmasked value compared True with NA') + raise RuntimeError("Unmasked value compared True with NA") test_kernel[1, 1]() From 0da7fc79a51e827bd0488c0787ecc8bf44d4ebc8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 17 Jun 2021 14:41:05 -0700 Subject: [PATCH 103/132] address reviewsA --- cpp/src/transform/jit/masked_udf_kernel.cu | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index 9d62093e4d2..d733e4e98e1 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,9 +22,6 @@ #include #include -#include -#include - #include #include From 9048879de53e8acc3a96578431751f2e97122955 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 17 Jun 2021 14:49:20 -0700 Subject: [PATCH 104/132] fix typing for NA --- python/cudf/cudf/core/udf/typing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 726664077c5..9e403185a16 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -15,8 +15,7 @@ ) from numba.core.typing.typeof import typeof from numba.cuda.cudadecl import registry as cuda_decl_registry - -from cudf.core.scalar import _NAType +from pandas._libs.missing import NAType as _NAType from . import classes from ._ops import arith_ops, comparison_ops From 9fa05a32b69a1aabba2d341a511a215e19df7b0e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 17 Jun 2021 14:56:16 -0700 Subject: [PATCH 105/132] minor name change --- cpp/src/transform/transform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index fcd2b6da5a4..a5169dbd94d 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -85,7 +85,7 @@ std::vector make_template_types(column_view outcol_view, table_view } void generalized_operation(table_view data_view, - std::string const& binary_udf, + std::string const& udf, data_type output_type, mutable_column_view outcol_view, mutable_column_view outmsk_view, @@ -98,7 +98,7 @@ void generalized_operation(table_view data_view, .instantiate(template_types); std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( - binary_udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); + udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); // {size, out_ptr, out_mask_ptr, col0_ptr, col0_mask_ptr, col0_offset, col1_ptr...} std::vector kernel_args; @@ -197,11 +197,11 @@ std::unique_ptr transform(column_view const& input, } std::unique_ptr generalized_masked_op(table_view data_view, - std::string const& binary_udf, + std::string const& udf, data_type output_type, rmm::mr::device_memory_resource* mr) { - return detail::generalized_masked_op(data_view, binary_udf, output_type, mr); + return detail::generalized_masked_op(data_view, udf, output_type, mr); } } // namespace cudf From b80753462b5fc167b442b11d07f7900ea4302e33 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 23 Jun 2021 16:32:25 -0500 Subject: [PATCH 106/132] Update cpp/src/transform/jit/masked_udf_kernel.cu Co-authored-by: Mike Wilson --- cpp/src/transform/jit/masked_udf_kernel.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index d733e4e98e1..c87f9e1630e 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -65,12 +65,12 @@ __global__ void generic_udf_kernel(cudf::size_type size, bool* out_mask, Arguments... args) { - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - int start = tid + blkid * blksz; - int step = blksz * gridsz; + int const tid = threadIdx.x; + int const blkid = blockIdx.x; + int const blksz = blockDim.x; + int const gridsz = gridDim.x; + int const start = tid + blkid * blksz; + int const step = blksz * gridsz; Masked output; for (cudf::size_type i = start; i < size; i += step) { From f56ffbb7e786520ce4a998be88806cce84c322f9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Jun 2021 07:08:56 -0700 Subject: [PATCH 107/132] add back missing header --- cpp/src/transform/jit/masked_udf_kernel.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index c87f9e1630e..3853201abc8 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -22,6 +22,8 @@ #include #include +#include + #include #include From 7f07452f2179c3658ad372979edaef4780064cff Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Jun 2021 13:03:21 -0700 Subject: [PATCH 108/132] revise headers again --- cpp/src/transform/jit/masked_udf_kernel.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index 3853201abc8..603802282b9 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -22,13 +22,12 @@ #include #include -#include +#include #include #include #include -#include #include #include From 968e91b0192dfb60a9cd8496415f3b4050a31749 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 1 Jul 2021 12:01:37 -0700 Subject: [PATCH 109/132] update docstring with examples --- python/cudf/cudf/core/dataframe.py | 125 ++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c39eac7fd4f..108129f2221 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4718,13 +4718,136 @@ def apply(self, func, axis=1): Note: axis=0 is not yet supported. * 1 or 'columns': apply function to each row. + Examples + ---------- + + Simple function of a single variable which could be NA + + >>> from cudf.core.udf.pipeline import nulludf + >>> @nulludf + ... def f(x): + ... if x is cudf.NA: + ... return 0 + ... else: + ... return x + 1 + ... + >>> df = cudf.DataFrame({'a': [1, cudf.NA, 3]}) + >>> df.apply(lambda row: f(row['a'])) + 0 2 + 1 0 + 2 4 + dtype: int64 + + Function of multiple variables will operate in + a null aware manner + + >>> @nulludf + ... def f(x, y): + ... return x - y + ... + >>> df = cudf.DataFrame({ + ... 'a': [1, cudf.NA, 3, cudf.NA], + ... 'b': [5, 6, cudf.NA, cudf.NA] + ... }) + >>> df.apply(lambda row: f(row['a'], row['b'])) + 0 -4 + 1 + 2 + 3 + dtype: int64 + + Functions may conditionally return NA as in pandas + + >>> @nulludf + ... def f(x, y): + ... if x + y > 3: + ... return cudf.NA + ... else: + ... return x + y + ... + >>> df = cudf.DataFrame({ + ... 'a': [1, 2, 3], + ... 'b': [2, 1, 1] + ... }) + >>> df.apply(lambda row: f(row['a'], row['b'])) + 0 3 + 1 3 + 2 + dtype: int64 + + Mixed types are allowed, but will return the common + type, rather than object as in pandas + + >>> @nulludf + ... def f(x, y): + ... return x + y + ... + >>> df = cudf.DataFrame({ + ... 'a': [1, 2, 3], + ... 'b': [0.5, cudf.NA, 3.14] + ... }) + >>> df.apply(lambda row: f(row['a'], row['b'])) + 0 1.5 + 1 + 2 6.14 + dtype: float64 + + Functions may also return scalar values, however the + result will be promoted to a safe type regardless of + the data + + >>> @nulludf + ... def f(x): + ... if x > 3: + ... return x + ... else: + ... return 1.5 + ... + >>> df = cudf.DataFrame({ + ... 'a': [1, 3, 5] + ... }) + >>> df.apply(lambda row: f(row['a'])) + 0 1.5 + 1 1.5 + 2 5.0 + dtype: float64 + + Ops against N columns are supported generally + + >>> @nulludf + ... def f(v, w, x, y, z): + ... return x + (y - (z / w)) % v + ... + >>> df = cudf.DataFrame({ + ... 'a': [1, 2, 3], + ... 'b': [4, 5, 6], + ... 'c': [cudf.NA, 4, 4], + ... 'd': [8, 7, 8], + ... 'e': [7, 1, 6] + ... }) + >>> df.apply( + ... lambda row: f( + ... row['a'], + ... row['b'], + ... row['c'], + ... row['d'], + ... row['e'] + ... ) + ... ) + 0 + 1 4.8 + 2 5.0 + dtype: float64 + """ + + if axis != 1: raise ValueError( "DataFrame.apply currently only supports row wise ops" ) - return func(self) + return cudf.Series(func(self)) @applyutils.doc_apply() def apply_rows( From 699239dc66249cc46bb1b4b59ce454ba081bae61 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 1 Jul 2021 12:23:36 -0700 Subject: [PATCH 110/132] add error checking --- python/cudf/cudf/core/dataframe.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 108129f2221..11644aa026d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4730,7 +4730,7 @@ def apply(self, func, axis=1): ... return 0 ... else: ... return x + 1 - ... + ... >>> df = cudf.DataFrame({'a': [1, cudf.NA, 3]}) >>> df.apply(lambda row: f(row['a'])) 0 2 @@ -4738,13 +4738,13 @@ def apply(self, func, axis=1): 2 4 dtype: int64 - Function of multiple variables will operate in + Function of multiple variables will operate in a null aware manner >>> @nulludf ... def f(x, y): ... return x - y - ... + ... >>> df = cudf.DataFrame({ ... 'a': [1, cudf.NA, 3, cudf.NA], ... 'b': [5, 6, cudf.NA, cudf.NA] @@ -4773,7 +4773,7 @@ def apply(self, func, axis=1): 0 3 1 3 2 - dtype: int64 + dtype: int64 Mixed types are allowed, but will return the common type, rather than object as in pandas @@ -4781,7 +4781,7 @@ def apply(self, func, axis=1): >>> @nulludf ... def f(x, y): ... return x + y - ... + ... >>> df = cudf.DataFrame({ ... 'a': [1, 2, 3], ... 'b': [0.5, cudf.NA, 3.14] @@ -4792,7 +4792,7 @@ def apply(self, func, axis=1): 2 6.14 dtype: float64 - Functions may also return scalar values, however the + Functions may also return scalar values, however the result will be promoted to a safe type regardless of the data @@ -4802,7 +4802,7 @@ def apply(self, func, axis=1): ... return x ... else: ... return 1.5 - ... + ... >>> df = cudf.DataFrame({ ... 'a': [1, 3, 5] ... }) @@ -4817,7 +4817,7 @@ def apply(self, func, axis=1): >>> @nulludf ... def f(v, w, x, y, z): ... return x + (y - (z / w)) % v - ... + ... >>> df = cudf.DataFrame({ ... 'a': [1, 2, 3], ... 'b': [4, 5, 6], @@ -4841,6 +4841,15 @@ def apply(self, func, axis=1): """ + for dtype in self.dtypes: + if ( + isinstance(dtype, cudf.core.dtypes._BaseDtype) + or dtype == "object" + ): + raise TypeError( + "DataFrame.apply currently only " + "supports non decimal numeric types" + ) if axis != 1: raise ValueError( From 2d071522d9c0b4d0abe5ebe09e4aeead7eb3704c Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 1 Jul 2021 14:09:32 -0700 Subject: [PATCH 111/132] Address reviews --- python/cudf/cudf/core/udf/classes.py | 11 +++++++++++ python/cudf/cudf/core/udf/lowering.py | 27 ++++++++++++++++++--------- python/cudf/cudf/core/udf/typing.py | 11 +++++------ 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/udf/classes.py b/python/cudf/cudf/core/udf/classes.py index 1e979253fba..b3b6f2cbbdf 100644 --- a/python/cudf/cudf/core/udf/classes.py +++ b/python/cudf/cudf/core/udf/classes.py @@ -1,4 +1,15 @@ class Masked: + """ + Most of the time, MaskedType as defined in typing.py + combined with the ops defined to operate on them are + enough to fulfill the obligations of DataFrame.apply + However sometimes we need to refer to an instance of + a masked scalar outside the context of a UDF like as + a global variable. To get numba to identify that var + a of type MaskedType and treat it as such we need to + have an actual python class we can tie to MaskedType + This is that class + """ def __init__(self, value, valid): self.value = value self.valid = valid diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index e18ee37e04a..1cda3903fe4 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -5,7 +5,6 @@ from numba.core.typing import signature as nb_signature from numba.cuda.cudaimpl import ( lower as cuda_lower, - registry as cuda_impl_registry, registry as cuda_lowering_registry, ) from numba.extending import lower_builtin, types @@ -120,7 +119,7 @@ def masked_scalar_const_op_impl(context, builder, sig, args): Implement `MaskedType` + constant """ masked_type, const_type = sig.args - masked_value, numeric_value = args + masked_value, const_value = args return_type = sig.return_type masked_input_type = MaskedType(masked_type.value_type) @@ -139,7 +138,7 @@ def masked_scalar_const_op_impl(context, builder, sig, args): nb_signature( return_type.value_type, masked_type.value_type, const_type ), - (indata.value, numeric_value), + (indata.value, const_value), ) result.valid = context.get_constant(types.boolean, 1) @@ -151,7 +150,7 @@ def masked_scalar_const_op_impl(context, builder, sig, args): def make_reflected_const_op(op): def masked_scalar_reflected_const_op_impl(context, builder, sig, args): const_type, masked_type = sig.args - numeric_value, masked_value = args + const_value, masked_value = args return_type = sig.return_type masked_input_type = MaskedType(masked_type.value_type) @@ -170,7 +169,7 @@ def masked_scalar_reflected_const_op_impl(context, builder, sig, args): nb_signature( return_type.value_type, const_type, masked_type.value_type ), - (numeric_value, indata.value), + (const_value, indata.value), ) result.valid = context.get_constant(types.boolean, 1) @@ -241,7 +240,7 @@ def masked_scalar_bool_impl(context, builder, sig, args): # To handle the unification, we need to support casting from any type to an # extension type. The cast implementation takes the value passed in and returns # an extension struct wrapping that value. -@cuda_impl_registry.lower_cast(types.Any, MaskedType) +@cuda_lowering_registry.lower_cast(types.Any, MaskedType) def cast_primitive_to_masked(context, builder, fromty, toty, val): casted = context.cast(builder, val, fromty, toty.value_type) ext = cgutils.create_struct_proxy(toty)(context, builder) @@ -250,7 +249,7 @@ def cast_primitive_to_masked(context, builder, fromty, toty, val): return ext._getvalue() -@cuda_impl_registry.lower_cast(NAType, MaskedType) +@cuda_lowering_registry.lower_cast(NAType, MaskedType) def cast_na_to_masked(context, builder, fromty, toty, val): result = cgutils.create_struct_proxy(toty)(context, builder) result.valid = context.get_constant(types.boolean, 0) @@ -258,8 +257,18 @@ def cast_na_to_masked(context, builder, fromty, toty, val): return result._getvalue() -@cuda_impl_registry.lower_cast(MaskedType, MaskedType) +@cuda_lowering_registry.lower_cast(MaskedType, MaskedType) def cast_masked_to_masked(context, builder, fromty, toty, val): + """ + When numba encounters an op that expects a certain type and + the input to the op is not of the expected type it will try + to cast the input to the appropriate type. But, in our case + the input may be a MaskedType, which numba doesn't natively + know how to cast to a different MaskedType with a different + `value_type`. This implements and registers that cast. + """ + + # We will operand = cgutils.create_struct_proxy(fromty)(context, builder, value=val) casted = context.cast( builder, operand.value, fromty.value_type, toty.value_type @@ -281,7 +290,7 @@ def masked_constructor(context, builder, sig, args): return masked._getvalue() -@cuda_impl_registry.lower_constant(MaskedType) +@cuda_lowering_registry.lower_constant(MaskedType) def lower_constant_masked(context, builder, ty, val): masked = cgutils.create_struct_proxy(ty)(context, builder) masked.value = context.get_constant(ty.value_type, val.value) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 9e403185a16..2ed86d635de 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -30,13 +30,12 @@ class MaskedType(types.Type): def __init__(self, value): # MaskedType in Numba shall be parameterized # with a value type - super().__init__(name="Masked") - if isinstance(value, MaskedType): - breakpoint() + if not isinstance(value, (types.Number, types.Boolean)): + raise TypeError( + "value_type must be a numeric scalar type" + ) self.value_type = value - - def __repr__(self): - return f"MaskedType({self.value_type})" + super().__init__(name=f"Masked{self.value_type}") def __hash__(self): """ From 95098e6f1f40826100560ea0bc212b213342c170 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 1 Jul 2021 16:17:59 -0500 Subject: [PATCH 112/132] Apply suggestions from code review Co-authored-by: Graham Markall <535640+gmarkall@users.noreply.github.com> --- python/cudf/cudf/core/udf/lowering.py | 17 +++++++++-------- python/cudf/cudf/core/udf/typing.py | 22 ++++++++++++---------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 1cda3903fe4..e5684a69f80 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -36,7 +36,7 @@ def make_arithmetic_op(op): def masked_scalar_op_impl(context, builder, sig, args): """ - Implement `MaskedType` + `MaskedType` + Implement `MaskedType` `MaskedType` """ # MaskedType(...), MaskedType(...) masked_type_1, masked_type_2 = sig.args @@ -61,7 +61,7 @@ def masked_scalar_op_impl(context, builder, sig, args): valid = builder.and_(m1.valid, m2.valid) result.valid = valid with builder.if_then(valid): - # Let numba handle generating the extra LLVM needed to perform + # Let numba handle generating the extra IR needed to perform # operations on mixed types, by compiling the final core op between # the two primitive values as a separate function and calling it result.value = context.compile_internal( @@ -98,8 +98,9 @@ def register_arithmetic_op(op): def masked_scalar_null_op_impl(context, builder, sig, args): """ - Implement `MaskedType` + `NAType` - The answer to this is known up front so no actual addition + Implement `MaskedType` `NAType` + or `NAType` `MaskedType` + The answer to this is known up front so no actual operation needs to take place """ @@ -116,7 +117,7 @@ def masked_scalar_null_op_impl(context, builder, sig, args): def make_const_op(op): def masked_scalar_const_op_impl(context, builder, sig, args): """ - Implement `MaskedType` + constant + Implement `MaskedType` constant """ masked_type, const_type = sig.args masked_value, const_value = args @@ -237,9 +238,9 @@ def masked_scalar_bool_impl(context, builder, sig, args): return indata.value -# To handle the unification, we need to support casting from any type to an -# extension type. The cast implementation takes the value passed in and returns -# an extension struct wrapping that value. +# To handle the unification, we need to support casting from any type to a +# masked type. The cast implementation takes the value passed in and returns +# a masked type struct wrapping that value. @cuda_lowering_registry.lower_cast(types.Any, MaskedType) def cast_primitive_to_masked(context, builder, fromty, toty, val): casted = context.cast(builder, val, fromty, toty.value_type) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 2ed86d635de..53c34abf1af 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -173,7 +173,7 @@ def __init__(self): def unify(self, context, other): """ - Masked <-> NA works from above + Masked <-> NA is deferred to MaskedType.unify() Literal <-> NA -> Masked """ if isinstance(other, MaskedType): @@ -202,30 +202,32 @@ def typeof_na(val, c): register_model(NAType)(models.OpaqueModel) -# Ultimately, we want numba to produce PTX code that specifies how to add -# two singular `Masked` structs together, which is defined as producing a +# Ultimately, we want numba to produce PTX code that specifies how to implement +# an operation on two singular `Masked` structs together, which is defined as producing a # new `Masked` with the right validity and if valid, the correct value. # This happens in two phases: -# 1. Specify that `Masked` + `Masked` exists and what it should return +# 1. Specify that `Masked` `Masked` exists and what it should return # 2. Implement how to actually do (1) at the LLVM level # The following code accomplishes (1) - it is really just a way of specifying -# that the `+` operation has a CUDA overload that accepts two `Masked` that +# that the has a CUDA overload that accepts two `Masked` that # are parameterized with `value_type` and what flavor of `Masked` to return. class MaskedScalarArithOp(AbstractTemplate): def generic(self, args, kws): """ - Typing for `Masked` + `Masked` + Typing for `Masked` `Masked` Numba expects a valid numba type to be returned if typing is successful - else `None` signifies the error state (this is common across numba) + else `None` signifies the error state (this pattern is commonly used + in Numba) """ if isinstance(args[0], MaskedType) and isinstance(args[1], MaskedType): # In the case of op(Masked, Masked), the return type is a Masked # such that Masked.value is the primitive type that would have - # been resolved if we were just adding the `value_type`s. + # been resolved if we were just operating on the + # `value_type`s. return_type = self.context.resolve_function_type( self.key, (args[0].value_type, args[1].value_type), kws ).return_type - return nb_signature(MaskedType(return_type), args[0], args[1],) + return nb_signature(MaskedType(return_type), args[0], args[1]) class MaskedScalarNullOp(AbstractTemplate): @@ -245,7 +247,7 @@ def generic(self, args, kws): class MaskedScalarScalarOp(AbstractTemplate): def generic(self, args, kws): """ - Typing for `Masked` + a scalar. + Typing for `Masked` a scalar (and vice-versa). handles situations like `x + 1` """ if isinstance(args[0], MaskedType) and isinstance( From b724410ee7c62f54bcc8ec1e20977a7eaec88764 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 1 Jul 2021 14:24:42 -0700 Subject: [PATCH 113/132] address more revies --- python/cudf/cudf/core/udf/lowering.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index e5684a69f80..67ded626788 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -290,7 +290,8 @@ def masked_constructor(context, builder, sig, args): masked.valid = valid return masked._getvalue() - +# Allows us to make an instance of MaskedType a global variable +# and properly use it inside functions we will later compile @cuda_lowering_registry.lower_constant(MaskedType) def lower_constant_masked(context, builder, ty, val): masked = cgutils.create_struct_proxy(ty)(context, builder) From 593cbd290d504b76fa446624cab7d25b1ea47b84 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 1 Jul 2021 14:43:41 -0700 Subject: [PATCH 114/132] simplify masked/unmasked typing logic --- python/cudf/cudf/core/udf/typing.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 53c34abf1af..706b8b2b52c 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -250,22 +250,20 @@ def generic(self, args, kws): Typing for `Masked` a scalar (and vice-versa). handles situations like `x + 1` """ + # In the case of op(Masked, scalar), we resolve the type between + # the Masked value_type and the scalar's type directly if isinstance(args[0], MaskedType) and isinstance( args[1], types.Number ): - # In the case of op(Masked, scalar), we resolve the type between - # the Masked value_type and the scalar's type directly - return_type = self.context.resolve_function_type( - self.key, (args[0].value_type, args[1]), kws - ).return_type - return nb_signature(MaskedType(return_type), args[0], args[1],) + to_resolve_types = (args[0].value_type, args[1]) elif isinstance(args[0], types.Number) and isinstance( args[1], MaskedType ): - return_type = self.context.resolve_function_type( - self.key, (args[1].value_type, args[0]), kws - ).return_type - return nb_signature(MaskedType(return_type), args[0], args[1],) + to_resolve_types = (args[1].value_type, args[0]) + return_type = self.context.resolve_function_type( + self.key, to_resolve_types, kws + ).return_type + return nb_signature(MaskedType(return_type), args[0], args[1],) @cuda_decl_registry.register_global(operator.is_) From a31c15a39252c2b3549dc54de118f2947c8e3069 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 2 Jul 2021 07:04:34 -0700 Subject: [PATCH 115/132] style fixes --- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/core/udf/lowering.py | 3 ++- python/cudf/cudf/core/udf/typing.py | 10 ++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 11644aa026d..0842779cf26 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4764,7 +4764,7 @@ def apply(self, func, axis=1): ... return cudf.NA ... else: ... return x + y - ... + ... >>> df = cudf.DataFrame({ ... 'a': [1, 2, 3], ... 'b': [2, 1, 1] diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index 67ded626788..c490d216ede 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -269,7 +269,7 @@ def cast_masked_to_masked(context, builder, fromty, toty, val): `value_type`. This implements and registers that cast. """ - # We will + # We will operand = cgutils.create_struct_proxy(fromty)(context, builder, value=val) casted = context.cast( builder, operand.value, fromty.value_type, toty.value_type @@ -290,6 +290,7 @@ def masked_constructor(context, builder, sig, args): masked.valid = valid return masked._getvalue() + # Allows us to make an instance of MaskedType a global variable # and properly use it inside functions we will later compile @cuda_lowering_registry.lower_constant(MaskedType) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 706b8b2b52c..a1f03b854ed 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -31,9 +31,7 @@ def __init__(self, value): # MaskedType in Numba shall be parameterized # with a value type if not isinstance(value, (types.Number, types.Boolean)): - raise TypeError( - "value_type must be a numeric scalar type" - ) + raise TypeError("value_type must be a numeric scalar type") self.value_type = value super().__init__(name=f"Masked{self.value_type}") @@ -203,9 +201,9 @@ def typeof_na(val, c): # Ultimately, we want numba to produce PTX code that specifies how to implement -# an operation on two singular `Masked` structs together, which is defined as producing a -# new `Masked` with the right validity and if valid, the correct value. -# This happens in two phases: +# an operation on two singular `Masked` structs together, which is defined +# as producing a new `Masked` with the right validity and if valid, +# the correct value. This happens in two phases: # 1. Specify that `Masked` `Masked` exists and what it should return # 2. Implement how to actually do (1) at the LLVM level # The following code accomplishes (1) - it is really just a way of specifying From 448e4ea07469d66f28ac1d90d0b9897d50a3ce5d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 2 Jul 2021 13:03:23 -0700 Subject: [PATCH 116/132] refactor lowering for reflected const ops --- python/cudf/cudf/core/udf/lowering.py | 80 +++++++++------------------ 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py index c490d216ede..1467a61f215 100644 --- a/python/cudf/cudf/core/udf/lowering.py +++ b/python/cudf/cudf/core/udf/lowering.py @@ -116,75 +116,47 @@ def masked_scalar_null_op_impl(context, builder, sig, args): def make_const_op(op): def masked_scalar_const_op_impl(context, builder, sig, args): - """ - Implement `MaskedType` constant - """ - masked_type, const_type = sig.args - masked_value, const_value = args - return_type = sig.return_type - masked_input_type = MaskedType(masked_type.value_type) - indata = cgutils.create_struct_proxy(masked_input_type)( - context, builder, value=masked_value - ) - masked_return_type = MaskedType(return_type.value_type) - result = cgutils.create_struct_proxy(masked_return_type)( - context, builder - ) + result = cgutils.create_struct_proxy(return_type)(context, builder) result.valid = context.get_constant(types.boolean, 0) - with builder.if_then(indata.valid): - result.value = context.compile_internal( - builder, - lambda x, y: op(x, y), - nb_signature( - return_type.value_type, masked_type.value_type, const_type - ), - (indata.value, const_value), - ) - result.valid = context.get_constant(types.boolean, 1) - - return result._getvalue() - - return masked_scalar_const_op_impl + if isinstance(sig.args[0], MaskedType): + masked_type, const_type = sig.args + masked_value, const_value = args - -def make_reflected_const_op(op): - def masked_scalar_reflected_const_op_impl(context, builder, sig, args): - const_type, masked_type = sig.args - const_value, masked_value = args - - return_type = sig.return_type - masked_input_type = MaskedType(masked_type.value_type) - indata = cgutils.create_struct_proxy(masked_input_type)( - context, builder, value=masked_value - ) - masked_return_type = MaskedType(return_type.value_type) - result = cgutils.create_struct_proxy(masked_return_type)( - context, builder - ) - result.valid = context.get_constant(types.boolean, 0) + indata = cgutils.create_struct_proxy(masked_type)( + context, builder, value=masked_value + ) + nb_sig = nb_signature( + return_type.value_type, masked_type.value_type, const_type + ) + compile_args = (indata.value, const_value) + else: + const_type, masked_type = sig.args + const_value, masked_value = args + indata = cgutils.create_struct_proxy(masked_type)( + context, builder, value=masked_value + ) + nb_sig = nb_signature( + return_type.value_type, const_type, masked_type.value_type + ) + compile_args = (const_value, indata.value) with builder.if_then(indata.valid): result.value = context.compile_internal( - builder, - lambda x, y: op(x, y), - nb_signature( - return_type.value_type, const_type, masked_type.value_type - ), - (const_value, indata.value), + builder, lambda x, y: op(x, y), nb_sig, compile_args ) result.valid = context.get_constant(types.boolean, 1) - return result._getvalue() - return masked_scalar_reflected_const_op_impl + return masked_scalar_const_op_impl def register_const_op(op): to_lower_op = make_const_op(op) cuda_lower(op, MaskedType, types.Number)(to_lower_op) + cuda_lower(op, types.Number, MaskedType)(to_lower_op) - to_lower_op_reflected = make_reflected_const_op(op) - cuda_lower(op, types.Number, MaskedType)(to_lower_op_reflected) + # to_lower_op_reflected = make_reflected_const_op(op) + # cuda_lower(op, types.Number, MaskedType)(to_lower_op_reflected) # register all lowering at init From 6780814ece25783a26b4b6bd8bac5f4c2cee49ac Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 2 Jul 2021 13:29:13 -0700 Subject: [PATCH 117/132] cleanup --- python/cudf/cudf/core/udf/classes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/core/udf/classes.py b/python/cudf/cudf/core/udf/classes.py index b3b6f2cbbdf..fe2fbd9daad 100644 --- a/python/cudf/cudf/core/udf/classes.py +++ b/python/cudf/cudf/core/udf/classes.py @@ -10,6 +10,7 @@ class Masked: have an actual python class we can tie to MaskedType This is that class """ + def __init__(self, value, valid): self.value = value self.valid = valid From 6bf3cf533c33aad4ce659a1f5592a66613ab27c3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 6 Jul 2021 07:22:04 -0700 Subject: [PATCH 118/132] fix import and address reviews --- python/cudf/cudf/core/udf/typing.py | 46 +++++++++---------- python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index a1f03b854ed..6e026412f24 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -44,39 +44,35 @@ def __hash__(self): def unify(self, context, other): """ - Logic for sorting out what to do when the UDF conditionally - returns a `MaskedType`, an `NAType`, or a literal based off - the data at runtime. - - In this framework, every input column is treated as having - type `MaskedType`. Operations like `x + y` are understood - as translating to: - - `Masked(value=x, valid=True) + Masked(value=y, valid=True)` - - This means if the user writes a function such as - def f(x, y): - return x + y - - numba sees this function as: - f(x: MaskedType, y: MaskedType) -> MaskedType - - However if the user writes something like: - def f(x, y): - if x > 5: - return 42 + Often within a UDF an instance arises where a variable could + be a `MaskedType`, an `NAType`, or a literal based off + the data at runtime, for examplem the variable `ret` here: + + def f(x): + if x == 1: + ret = x + elif x > 2: + ret = 1 else: - return x + y + ret = cudf.NA + return ret + + When numba analyzes this function it will eventually figure + out that the variable `ret` could be any of the three types + from above. This scenario will only work if numba knows how + to find some kind of common type between the possibilities, + and this function implements that - the goal is to return a + common type when comparing `self` to other. - numba now sees this as - f(x: MaskedType(dtype_1), y: MaskedType(dtype_2)) - -> MaskedType(dtype_unified) """ # If we have Masked and NA, the output should be a # MaskedType with the original type as its value_type if isinstance(other, NAType): return self + + # two MaskedType unify to a new MaskedType whose value_type + # is the result of unifying `self` and `other` `value_type` elif isinstance(other, MaskedType): return MaskedType( context.unify_pairs(self.value_type, other.value_type) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index e1da9c4e73d..5c89203255f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -5,7 +5,7 @@ import cudf from cudf.core.udf.pipeline import nulludf -from cudf.tests.utils import NUMERIC_TYPES, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, assert_eq arith_ops = [ operator.add, From 6ed7a49e325e0149c055a55278b8bbb2f9423ea0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 7 Jul 2021 13:35:00 -0700 Subject: [PATCH 119/132] capture libcudacxx version for debugging --- ci/gpu/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 355b18f4543..7359bebcb25 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -214,6 +214,8 @@ fi ################################################################################ # TEST - Run py.test, notebooks ################################################################################ +OUTPUT=$(cat /opt/conda/envs/rapids/include/libcudf/libcudacxx/cuda/std/detail/__config) +gpuci_logger "${OUTPUT}" cd "$WORKSPACE/python/cudf" gpuci_logger "Python py.test for cuDF" From 4ab7bd8fd6563fde0b16a1b6fa696b4bea257028 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 12 Jul 2021 07:08:04 -0700 Subject: [PATCH 120/132] error for cuda<11.1 --- python/cudf/cudf/core/dataframe.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 5c193fe078c..b7dc905fd2a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4839,8 +4839,19 @@ def apply(self, func, axis=1): 2 5.0 dtype: float64 + Notes + ---------- + Available only using cuda 11.1+ due to particular required + runtime compilation features + """ + # libcudacxx tuples are not compatible with nvrtc 11.0 + runtime = cuda.cudadrv.runtime.Runtime() + mjr, mnr = runtime.get_version() + if mjr < 11 or (mjr == 11 and mnr < 1): + raise RuntimeError("DataFrame.apply requires CUDA 11.1+") + for dtype in self.dtypes: if ( isinstance(dtype, cudf.core.dtypes._BaseDtype) From 1ffce5b01fb6a68e246616644120c642b60726ff Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 12 Jul 2021 07:09:31 -0700 Subject: [PATCH 121/132] remove CI debugging --- ci/gpu/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7359bebcb25..355b18f4543 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -214,8 +214,6 @@ fi ################################################################################ # TEST - Run py.test, notebooks ################################################################################ -OUTPUT=$(cat /opt/conda/envs/rapids/include/libcudf/libcudacxx/cuda/std/detail/__config) -gpuci_logger "${OUTPUT}" cd "$WORKSPACE/python/cudf" gpuci_logger "Python py.test for cuDF" From 169bcf2d2205ba6165dad8cc2f24d1dda6066f05 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 12 Jul 2021 11:26:00 -0700 Subject: [PATCH 122/132] skip testing cuda 11.0 --- python/cudf/cudf/tests/test_udf_masked_ops.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 5c89203255f..3ecdc15a4a8 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -2,6 +2,7 @@ import pandas as pd import pytest +from numba import cuda import cudf from cudf.core.udf.pipeline import nulludf @@ -33,6 +34,13 @@ def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): + + # Skip testing CUDA 11.0 + runtime = cuda.cudadrv.runtime.Runtime() + mjr, mnr = runtime.get_version() + if mjr < 11 or (mjr == 11 and mnr < 1): + pytest.skip(reason="Skip testing for CUDA 11.0") + gdf = data pdf = data.to_pandas(nullable=True) From aec243d5a602b5dad36200a137971206db5dd633 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 13 Jul 2021 06:29:45 -0700 Subject: [PATCH 123/132] fix pytest --- python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 3ecdc15a4a8..f73f1526c7f 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -39,7 +39,7 @@ def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs): runtime = cuda.cudadrv.runtime.Runtime() mjr, mnr = runtime.get_version() if mjr < 11 or (mjr == 11 and mnr < 1): - pytest.skip(reason="Skip testing for CUDA 11.0") + pytest.skip("Skip testing for CUDA 11.0") gdf = data pdf = data.to_pandas(nullable=True) From 993d84188cf57b665df9c7db633f8ad10ffadf3a Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue, 13 Jul 2021 17:29:28 -0500 Subject: [PATCH 124/132] Apply suggestions from code review Co-authored-by: Nghia Truong --- cpp/src/transform/transform.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index a5169dbd94d..dad2b33d91e 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -117,9 +117,8 @@ void generalized_operation(table_view data_view, mask_ptrs.reserve(data_view.num_columns()); offsets.reserve(data_view.num_columns()); - column_view col; for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { - col = data_view.column(col_idx); + auto const& col = data_view.column(col_idx); data_ptrs.push_back(cudf::jit::get_data_ptr(col)); mask_ptrs.push_back(col.null_mask()); @@ -179,8 +178,8 @@ std::unique_ptr generalized_masked_op(table_view data_view, transformation::jit::generalized_operation( data_view, udf, output_type, *output, *output_mask, mr); - auto final_output_mask = cudf::bools_to_mask(*output_mask); - output.get()->set_null_mask(std::move(*(final_output_mask.first))); + auto [final_output_mask, out_something] = cudf::bools_to_mask(*output_mask); + output.get()->set_null_mask(std::move(final_output_mask)); return output; } From 512555bea322e1f67487baa920e7afe5075f64cc Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 13 Jul 2021 15:30:06 -0700 Subject: [PATCH 125/132] partially address reviews --- cpp/src/transform/jit/masked_udf_kernel.cu | 16 ++++++------ cpp/src/transform/transform.cpp | 29 ++++++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index 603802282b9..13afa20996b 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -14,23 +14,21 @@ * limitations under the License. */ -// Include Jitify's cstddef header first -#include - -#include -#include -#include -#include #include - +#include #include #include #include +#include +#include +#include +#include #include -#include + + namespace cudf { namespace transformation { diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index a5169dbd94d..1ae0c931a4f 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -14,12 +14,6 @@ * limitations under the License. */ -#include -#include - -#include -#include -#include #include #include @@ -30,6 +24,14 @@ #include #include +#include +#include + +#include +#include +#include + + #include namespace cudf { @@ -89,9 +91,10 @@ void generalized_operation(table_view data_view, data_type output_type, mutable_column_view outcol_view, mutable_column_view outmsk_view, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - std::vector template_types = make_template_types(outcol_view, data_view); + auto const template_types = make_template_types(outcol_view, data_view); std::string generic_kernel_name = jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") @@ -130,13 +133,12 @@ void generalized_operation(table_view data_view, kernel_args.push_back(&offsets[col_idx]); } - rmm::cuda_stream_view generic_stream; cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) .get_kernel(generic_kernel_name, {}, {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, - {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, generic_stream.value()) // + {"-arch=sm_."}) + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) ->launch(kernel_args.data()); } @@ -169,15 +171,16 @@ std::unique_ptr transform(column_view const& input, std::unique_ptr generalized_masked_op(table_view data_view, std::string const& udf, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - rmm::cuda_stream_view stream = rmm::cuda_stream_default; + std::unique_ptr output = make_fixed_width_column(output_type, data_view.num_rows()); std::unique_ptr output_mask = make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows()); transformation::jit::generalized_operation( - data_view, udf, output_type, *output, *output_mask, mr); + data_view, udf, output_type, *output, *output_mask, stream, mr); auto final_output_mask = cudf::bools_to_mask(*output_mask); output.get()->set_null_mask(std::move(*(final_output_mask.first))); @@ -201,7 +204,7 @@ std::unique_ptr generalized_masked_op(table_view data_view, data_type output_type, rmm::mr::device_memory_resource* mr) { - return detail::generalized_masked_op(data_view, udf, output_type, mr); + return detail::generalized_masked_op(data_view, udf, output_type, rmm::cuda_stream_default, mr); } } // namespace cudf From 8f1add4736a0001ea71dae6f9febb7475f105c4f Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue, 13 Jul 2021 17:31:12 -0500 Subject: [PATCH 126/132] Apply suggestions from code review Co-authored-by: GALI PREM SAGAR --- python/cudf/cudf/core/dataframe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b7dc905fd2a..780466458cc 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4719,7 +4719,7 @@ def apply(self, func, axis=1): * 1 or 'columns': apply function to each row. Examples - ---------- + -------- Simple function of a single variable which could be NA @@ -4840,10 +4840,9 @@ def apply(self, func, axis=1): dtype: float64 Notes - ---------- + ----- Available only using cuda 11.1+ due to particular required runtime compilation features - """ # libcudacxx tuples are not compatible with nvrtc 11.0 From b061710b05692f31c6cb7ba717b64fa399c4174b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 14 Jul 2021 11:57:13 -0700 Subject: [PATCH 127/132] updates --- cpp/src/transform/transform.cpp | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index b177a6ee660..771be891d61 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -75,7 +75,7 @@ std::vector make_template_types(column_view outcol_view, table_view cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); std::vector template_types; - template_types.reserve(data_view.num_columns() + 1); + template_types.reserve((3 * data_view.num_columns()) + 1); template_types.push_back(cudf::jit::get_type_name(outcol_view.type())); for (auto const& col : data_view) { @@ -86,6 +86,14 @@ std::vector make_template_types(column_view outcol_view, table_view return template_types; } +class Unpacker { + public: + thrust::tuple operator() (column_view input) { + return thrust::make_tuple(cudf::jit::get_data_ptr(input), input.null_mask(), input.offset()); + } +}; + + void generalized_operation(table_view data_view, std::string const& udf, data_type output_type, @@ -103,7 +111,6 @@ void generalized_operation(table_view data_view, std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - // {size, out_ptr, out_mask_ptr, col0_ptr, col0_mask_ptr, col0_offset, col1_ptr...} std::vector kernel_args; kernel_args.reserve((data_view.num_columns() * 3) + 3); @@ -120,13 +127,16 @@ void generalized_operation(table_view data_view, mask_ptrs.reserve(data_view.num_columns()); offsets.reserve(data_view.num_columns()); - for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { - auto const& col = data_view.column(col_idx); + auto zipit_start = thrust::make_zip_iterator( + thrust::make_tuple(data_ptrs.begin(), + mask_ptrs.begin(), + offsets.begin()) + ); - data_ptrs.push_back(cudf::jit::get_data_ptr(col)); - mask_ptrs.push_back(col.null_mask()); - offsets.push_back(col.offset()); + Unpacker unpacker; + thrust::transform(data_view.begin(), data_view.end(), zipit_start, unpacker); + for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { kernel_args.push_back(&data_ptrs[col_idx]); kernel_args.push_back(&mask_ptrs[col_idx]); kernel_args.push_back(&offsets[col_idx]); @@ -167,7 +177,7 @@ std::unique_ptr transform(column_view const& input, return output; } -std::unique_ptr generalized_masked_op(table_view data_view, +std::unique_ptr generalized_masked_op(table_view const& data_view, std::string const& udf, data_type output_type, rmm::cuda_stream_view stream, @@ -181,8 +191,8 @@ std::unique_ptr generalized_masked_op(table_view data_view, transformation::jit::generalized_operation( data_view, udf, output_type, *output, *output_mask, stream, mr); - auto [final_output_mask, out_something] = cudf::bools_to_mask(*output_mask); - output.get()->set_null_mask(std::move(final_output_mask)); + auto final_output_mask = cudf::bools_to_mask(*output_mask); + output.get()->set_null_mask(std::move(*(final_output_mask.first))); return output; } From 7c722dda1922e4379a7066ca410bf89ec8dac2c0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 14 Jul 2021 12:36:07 -0700 Subject: [PATCH 128/132] style --- cpp/src/transform/jit/masked_udf_kernel.cu | 7 ++----- cpp/src/transform/transform.cpp | 19 +++++++------------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu index 13afa20996b..319ad730c53 100644 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ b/cpp/src/transform/jit/masked_udf_kernel.cu @@ -14,9 +14,8 @@ * limitations under the License. */ - -#include #include +#include #include #include @@ -25,10 +24,8 @@ #include #include #include -#include #include - - +#include namespace cudf { namespace transformation { diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 771be891d61..f67ad4bf8a6 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ - #include #include #include @@ -31,7 +30,6 @@ #include #include - #include namespace cudf { @@ -87,13 +85,14 @@ std::vector make_template_types(column_view outcol_view, table_view } class Unpacker { - public: - thrust::tuple operator() (column_view input) { - return thrust::make_tuple(cudf::jit::get_data_ptr(input), input.null_mask(), input.offset()); - } + public: + thrust::tuple operator()( + column_view input) + { + return thrust::make_tuple(cudf::jit::get_data_ptr(input), input.null_mask(), input.offset()); + } }; - void generalized_operation(table_view data_view, std::string const& udf, data_type output_type, @@ -128,10 +127,7 @@ void generalized_operation(table_view data_view, offsets.reserve(data_view.num_columns()); auto zipit_start = thrust::make_zip_iterator( - thrust::make_tuple(data_ptrs.begin(), - mask_ptrs.begin(), - offsets.begin()) - ); + thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin())); Unpacker unpacker; thrust::transform(data_view.begin(), data_view.end(), zipit_start, unpacker); @@ -183,7 +179,6 @@ std::unique_ptr generalized_masked_op(table_view const& data_view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - std::unique_ptr output = make_fixed_width_column(output_type, data_view.num_rows()); std::unique_ptr output_mask = make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows()); From 7a7ee8376d42f1fe29b00af22ca186d92cfb1465 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 15 Jul 2021 13:12:09 -0700 Subject: [PATCH 129/132] use table_view const& --- cpp/include/cudf/transform.hpp | 2 +- cpp/src/transform/transform.cpp | 6 +++--- python/cudf/cudf/_lib/cpp/transform.pxd | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 50efea4e2e5..f5880e9b37f 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -54,7 +54,7 @@ std::unique_ptr transform( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr generalized_masked_op( - table_view data_view, + table_view const& data_view, std::string const& binary_udf, data_type output_type, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index f67ad4bf8a6..98364217491 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -65,7 +65,7 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -std::vector make_template_types(column_view outcol_view, table_view data_view) +std::vector make_template_types(column_view outcol_view, table_view const& data_view) { std::string mskptr_type = cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())) + "*"; @@ -93,7 +93,7 @@ class Unpacker { } }; -void generalized_operation(table_view data_view, +void generalized_operation(table_view const& data_view, std::string const& udf, data_type output_type, mutable_column_view outcol_view, @@ -203,7 +203,7 @@ std::unique_ptr transform(column_view const& input, return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr); } -std::unique_ptr generalized_masked_op(table_view data_view, +std::unique_ptr generalized_masked_op(table_view const& data_view, std::string const& udf, data_type output_type, rmm::mr::device_memory_resource* mr) diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index c8e84a3a2e9..9cb5bc10162 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -39,7 +39,7 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[column] generalized_masked_op( - table_view data_view, + const table_view& data_view, string udf, data_type output_type, ) except + From a20d630ff6737e3148f6cc196c401fe87e3424cf Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 15 Jul 2021 14:09:20 -0700 Subject: [PATCH 130/132] switch to a lambda --- cpp/src/transform/transform.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 98364217491..a2269a16d2f 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -84,15 +84,6 @@ std::vector make_template_types(column_view outcol_view, table_view return template_types; } -class Unpacker { - public: - thrust::tuple operator()( - column_view input) - { - return thrust::make_tuple(cudf::jit::get_data_ptr(input), input.null_mask(), input.offset()); - } -}; - void generalized_operation(table_view const& data_view, std::string const& udf, data_type output_type, @@ -129,14 +120,14 @@ void generalized_operation(table_view const& data_view, auto zipit_start = thrust::make_zip_iterator( thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin())); - Unpacker unpacker; - thrust::transform(data_view.begin(), data_view.end(), zipit_start, unpacker); - - for (int col_idx = 0; col_idx < data_view.num_columns(); col_idx++) { + int col_idx = 0; + std::transform(data_view.begin(), data_view.end(), zipit_start, [&](column_view col) { kernel_args.push_back(&data_ptrs[col_idx]); kernel_args.push_back(&mask_ptrs[col_idx]); kernel_args.push_back(&offsets[col_idx]); - } + col_idx++; + return thrust::make_tuple(cudf::jit::get_data_ptr(col), col.null_mask(), col.offset()); + }); cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) .get_kernel(generic_kernel_name, From a13e935abe13763887365b0d0dc755713643c32b Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 15 Jul 2021 21:52:44 -0500 Subject: [PATCH 131/132] Update cpp/src/transform/transform.cpp Co-authored-by: Nghia Truong --- cpp/src/transform/transform.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index a2269a16d2f..4a3aa5da3c3 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -117,15 +117,16 @@ void generalized_operation(table_view const& data_view, mask_ptrs.reserve(data_view.num_columns()); offsets.reserve(data_view.num_columns()); - auto zipit_start = thrust::make_zip_iterator( + auto const iters = thrust::make_zip_iterator( thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin())); - int col_idx = 0; - std::transform(data_view.begin(), data_view.end(), zipit_start, [&](column_view col) { - kernel_args.push_back(&data_ptrs[col_idx]); - kernel_args.push_back(&mask_ptrs[col_idx]); - kernel_args.push_back(&offsets[col_idx]); - col_idx++; + std::for_each(iters, iters + data_view.num_columns(), [](auto const& tuple_vals) { + kernel_args.push_back(thrust::get<0>(tuple_vals)); + kernel_args.push_back(thrust::get<1>(tuple_vals)); + kernel_args.push_back(thrust::get<2>(tuple_vals)); + }); + + std::transform(data_view.begin(), data_view.end(), iters, [&](column_view const& col) { return thrust::make_tuple(cudf::jit::get_data_ptr(col), col.null_mask(), col.offset()); }); From 9acc7a9f9292140d06d5a539ac27292f881dd1d9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 15 Jul 2021 19:58:41 -0700 Subject: [PATCH 132/132] updates --- cpp/src/transform/transform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 4a3aa5da3c3..5230b853a79 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -120,10 +120,10 @@ void generalized_operation(table_view const& data_view, auto const iters = thrust::make_zip_iterator( thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin())); - std::for_each(iters, iters + data_view.num_columns(), [](auto const& tuple_vals) { - kernel_args.push_back(thrust::get<0>(tuple_vals)); - kernel_args.push_back(thrust::get<1>(tuple_vals)); - kernel_args.push_back(thrust::get<2>(tuple_vals)); + std::for_each(iters, iters + data_view.num_columns(), [&](auto const& tuple_vals) { + kernel_args.push_back(&thrust::get<0>(tuple_vals)); + kernel_args.push_back(&thrust::get<1>(tuple_vals)); + kernel_args.push_back(&thrust::get<2>(tuple_vals)); }); std::transform(data_view.begin(), data_view.end(), iters, [&](column_view const& col) {