diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd index ea05256430a..7508052646a 100644 --- a/python/cudf/cudf/_lib/cpp/join.pxd +++ b/python/cudf/cudf/_lib/cpp/join.pxd @@ -10,7 +10,7 @@ from rmm._lib.device_uvector cimport device_uvector from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.cpp.types cimport size_type +from cudf._lib.cpp.types cimport null_equality, size_type ctypedef unique_ptr[device_uvector[size_type]] gather_map_type ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type @@ -40,3 +40,33 @@ cdef extern from "cudf/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, ) except + + + cdef gather_map_pair_type inner_join( + const table_view left_keys, + const table_view right_keys, + null_equality nulls_equal, + ) except + + + cdef gather_map_pair_type left_join( + const table_view left_keys, + const table_view right_keys, + null_equality nulls_equal, + ) except + + + cdef gather_map_pair_type full_join( + const table_view left_keys, + const table_view right_keys, + null_equality nulls_equal, + ) except + + + cdef gather_map_type left_semi_join( + const table_view left_keys, + const table_view right_keys, + null_equality nulls_equal, + ) except + + + cdef gather_map_type left_anti_join( + const table_view left_keys, + const table_view right_keys, + null_equality nulls_equal, + ) except + diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx index 65f2f8cdcc8..0a54f0d67a0 100644 --- a/python/cudf/cudf/_lib/join.pyx +++ b/python/cudf/cudf/_lib/join.pyx @@ -20,6 +20,7 @@ def join(list lhs, list rhs, how=None): left_rows, right_rows = join_func( pylibcudf.Table([c.to_pylibcudf(mode="read") for c in lhs]), pylibcudf.Table([c.to_pylibcudf(mode="read") for c in rhs]), + pylibcudf.types.NullEquality.EQUAL ) return Column.from_pylibcudf(left_rows), Column.from_pylibcudf(right_rows) @@ -37,5 +38,6 @@ def semi_join(list lhs, list rhs, how=None): join_func( pylibcudf.Table([c.to_pylibcudf(mode="read") for c in lhs]), pylibcudf.Table([c.to_pylibcudf(mode="read") for c in rhs]), + pylibcudf.types.NullEquality.EQUAL ) ), None diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index a821c9186a0..fc5cc77c9e7 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -21,7 +21,7 @@ cdef class Column: gpumemoryview _mask size_type _null_count size_type _offset - # children: List[Column] + # _children: List[Column] list _children size_type _num_children diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index dbe8d4feb37..2a7215099d5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -45,6 +45,8 @@ cdef class Column: gpumemoryview mask, size_type null_count, size_type offset, list children ): + if not all(isinstance(c, Column) for c in children): + raise ValueError("All children must be pylibcudf Column objects") self._data_type = data_type self._size = size self._data = data diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pxd b/python/cudf/cudf/_lib/pylibcudf/join.pxd index 4014dd4a399..ff7dec97596 100644 --- a/python/cudf/cudf/_lib/pylibcudf/join.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/join.pxd @@ -1,15 +1,37 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cudf._lib.cpp.types cimport null_equality + from .column cimport Column from .table cimport Table -cpdef tuple inner_join(Table left_keys, Table right_keys) +cpdef tuple inner_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +) -cpdef tuple left_join(Table left_keys, Table right_keys) +cpdef tuple left_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +) -cpdef tuple full_join(Table left_keys, Table right_keys) +cpdef tuple full_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +) -cpdef Column left_semi_join(Table left_keys, Table right_keys) +cpdef Column left_semi_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +) -cpdef Column left_anti_join(Table left_keys, Table right_keys) +cpdef Column left_anti_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +) diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pyx b/python/cudf/cudf/_lib/pylibcudf/join.pyx index e1b61dabe22..3710a84e594 100644 --- a/python/cudf/cudf/_lib/pylibcudf/join.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/join.pyx @@ -9,7 +9,7 @@ from rmm._lib.device_buffer cimport device_buffer from cudf._lib.cpp cimport join as cpp_join from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.types cimport data_type, size_type, type_id +from cudf._lib.cpp.types cimport data_type, null_equality, size_type, type_id from .column cimport Column from .table cimport Table @@ -32,7 +32,11 @@ cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map): ) -cpdef tuple inner_join(Table left_keys, Table right_keys): +cpdef tuple inner_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +): """Perform an inner join between two tables. For details, see :cpp:func:`inner_join`. @@ -43,6 +47,8 @@ cpdef tuple inner_join(Table left_keys, Table right_keys): The left table to join. right_keys : Table The right table to join. + nulls_equal : NullEquality + Should nulls compare equal? Returns ------- @@ -52,14 +58,18 @@ cpdef tuple inner_join(Table left_keys, Table right_keys): """ cdef cpp_join.gather_map_pair_type c_result with nogil: - c_result = cpp_join.inner_join(left_keys.view(), right_keys.view()) + c_result = cpp_join.inner_join(left_keys.view(), right_keys.view(), nulls_equal) return ( _column_from_gather_map(move(c_result.first)), _column_from_gather_map(move(c_result.second)), ) -cpdef tuple left_join(Table left_keys, Table right_keys): +cpdef tuple left_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +): """Perform a left join between two tables. For details, see :cpp:func:`left_join`. @@ -70,6 +80,9 @@ cpdef tuple left_join(Table left_keys, Table right_keys): The left table to join. right_keys : Table The right table to join. + nulls_equal : NullEquality + Should nulls compare equal? + Returns ------- @@ -79,14 +92,18 @@ cpdef tuple left_join(Table left_keys, Table right_keys): """ cdef cpp_join.gather_map_pair_type c_result with nogil: - c_result = cpp_join.left_join(left_keys.view(), right_keys.view()) + c_result = cpp_join.left_join(left_keys.view(), right_keys.view(), nulls_equal) return ( _column_from_gather_map(move(c_result.first)), _column_from_gather_map(move(c_result.second)), ) -cpdef tuple full_join(Table left_keys, Table right_keys): +cpdef tuple full_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +): """Perform a full join between two tables. For details, see :cpp:func:`full_join`. @@ -97,6 +114,9 @@ cpdef tuple full_join(Table left_keys, Table right_keys): The left table to join. right_keys : Table The right table to join. + nulls_equal : NullEquality + Should nulls compare equal? + Returns ------- @@ -106,14 +126,18 @@ cpdef tuple full_join(Table left_keys, Table right_keys): """ cdef cpp_join.gather_map_pair_type c_result with nogil: - c_result = cpp_join.full_join(left_keys.view(), right_keys.view()) + c_result = cpp_join.full_join(left_keys.view(), right_keys.view(), nulls_equal) return ( _column_from_gather_map(move(c_result.first)), _column_from_gather_map(move(c_result.second)), ) -cpdef Column left_semi_join(Table left_keys, Table right_keys): +cpdef Column left_semi_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +): """Perform a left semi join between two tables. For details, see :cpp:func:`left_semi_join`. @@ -124,6 +148,9 @@ cpdef Column left_semi_join(Table left_keys, Table right_keys): The left table to join. right_keys : Table The right table to join. + nulls_equal : NullEquality + Should nulls compare equal? + Returns ------- @@ -132,11 +159,19 @@ cpdef Column left_semi_join(Table left_keys, Table right_keys): """ cdef cpp_join.gather_map_type c_result with nogil: - c_result = cpp_join.left_semi_join(left_keys.view(), right_keys.view()) + c_result = cpp_join.left_semi_join( + left_keys.view(), + right_keys.view(), + nulls_equal + ) return _column_from_gather_map(move(c_result)) -cpdef Column left_anti_join(Table left_keys, Table right_keys): +cpdef Column left_anti_join( + Table left_keys, + Table right_keys, + null_equality nulls_equal +): """Perform a left anti join between two tables. For details, see :cpp:func:`left_anti_join`. @@ -147,6 +182,9 @@ cpdef Column left_anti_join(Table left_keys, Table right_keys): The left table to join. right_keys : Table The right table to join. + nulls_equal : NullEquality + Should nulls compare equal? + Returns ------- @@ -155,5 +193,9 @@ cpdef Column left_anti_join(Table left_keys, Table right_keys): """ cdef cpp_join.gather_map_type c_result with nogil: - c_result = cpp_join.left_anti_join(left_keys.view(), right_keys.view()) + c_result = cpp_join.left_anti_join( + left_keys.view(), + right_keys.view(), + nulls_equal + ) return _column_from_gather_map(move(c_result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pyx b/python/cudf/cudf/_lib/pylibcudf/table.pyx index 6d25d215f28..0cde346fa9c 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/table.pyx @@ -28,6 +28,8 @@ cdef class Table: The columns in this table. """ def __init__(self, list columns): + if not all(isinstance(c, Column) for c in columns): + raise ValueError("All columns must be pylibcudf Column objects") self._columns = columns cdef table_view view(self) nogil: