From 035a5af8b2485d2a42e81d31ac9b4fa05defbe15 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 29 Apr 2021 10:48:55 -0700 Subject: [PATCH 01/11] initial roundtripping prototype --- cpp/include/cudf/scalar/scalar.hpp | 2 ++ cpp/src/scalar/scalar.cpp | 8 +++++++ python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 9 ++++++++ python/cudf/cudf/_lib/scalar.pyx | 25 ++++++++++++++++++++- 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index b1ce1971a5c..a027908d0d7 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -631,5 +631,7 @@ class list_scalar : public scalar { cudf::column _data; }; +std::unique_ptr get_test_list_scalar(column_view in_col); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index fe051b1ffc5..8d422aaa577 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -56,4 +56,12 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const return result; } +std::unique_ptr get_test_list_scalar(column_view in_col) { + std::unique_ptr out = std::make_unique( + cudf::list_scalar(in_col) + ); + return out; +} + + } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index fec1c6382e6..5cf24e978f9 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -9,6 +9,10 @@ from libcpp.string cimport string from cudf._lib.cpp.types cimport data_type from cudf._lib.cpp.wrappers.decimals cimport scale_type +from libcpp.memory cimport unique_ptr +from cudf._lib.cpp.column.column_view cimport column_view + + cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass scalar: scalar() except + @@ -60,3 +64,8 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: bool is_valid) except + int64_t value() except + # TODO: Figure out how to add an int32 overload of value() + + cdef cppclass list_scalar(scalar): + column_view view() except + + + cdef unique_ptr[scalar] get_test_list_scalar(column_view in_col) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index b31f0675422..4ac7118d6ca 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -41,10 +41,16 @@ from cudf._lib.cpp.scalar.scalar cimport ( timestamp_scalar, duration_scalar, string_scalar, - fixed_point_scalar + fixed_point_scalar, + list_scalar, + get_test_list_scalar, ) from cudf.utils.dtypes import _decimal_to_int64 cimport cudf._lib.cpp.types as libcudf_types +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.table cimport Table +from cudf._lib.interop import to_arrow cdef class DeviceScalar: @@ -440,3 +446,20 @@ def _create_proxy_nat_scalar(dtype): return result else: raise TypeError('NAT only valid for datetime and timedelta') + +def test_get_test_list_scalar(Column input_column): + cdef column_view in_col = input_column.view() + cdef unique_ptr[scalar] unique_ptr_to_scalar = get_test_list_scalar(in_col) + + cdef DeviceScalar devslr = DeviceScalar.__new__(DeviceScalar) + devslr.c_value = move(unique_ptr_to_scalar) + + cdef list_scalar* raw_ptr = (devslr.c_value).get() + cdef column_view the_scalars_data = raw_ptr[0].view() + + cdef Column output_column = Column.from_column_view(the_scalars_data, None) + + cdef Table to_arrow_table = Table({"col": output_column}) + arrow_table = to_arrow(to_arrow_table, [["col", []]]) + + return arrow_table['col'].to_pylist() From 5cd644df927d621cd72fec1fd0cb95b07f17595b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 4 May 2021 07:35:31 -0700 Subject: [PATCH 02/11] move logic to from_unique_ptr and add dtype getting logic --- python/cudf/cudf/_lib/scalar.pyx | 40 +++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 4ac7118d6ca..15d829ce84e 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -18,9 +18,12 @@ from libcpp.utility cimport move from libcpp cimport bool import cudf -from cudf._lib.types import cudf_to_np_types, duration_unit_map +from cudf._lib.types import ( + cudf_to_np_types, + duration_unit_map +) from cudf._lib.types import datetime_unit_map -from cudf._lib.types cimport underlying_type_t_type_id +from cudf._lib.types cimport underlying_type_t_type_id, dtype_from_column_view from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_s, @@ -45,7 +48,7 @@ from cudf._lib.cpp.scalar.scalar cimport ( list_scalar, get_test_list_scalar, ) -from cudf.utils.dtypes import _decimal_to_int64 +from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view @@ -103,6 +106,8 @@ cdef class DeviceScalar: def _to_host_scalar(self): if isinstance(self.dtype, cudf.Decimal64Dtype): result = _get_py_decimal_from_fixed_point(self.c_value) + elif is_list_dtype(self.dtype): + result = _get_py_list_from_list(self.c_value) elif pd.api.types.is_string_dtype(self.dtype): result = _get_py_string_from_string(self.c_value) elif pd.api.types.is_numeric_dtype(self.dtype): @@ -165,6 +170,10 @@ cdef class DeviceScalar: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" ) + elif cdtype.id() == libcudf_types.LIST: + s._dtype = dtype_from_column_view( + (s.get_raw_ptr())[0].view() + ) else: if dtype is not None: s._dtype = dtype @@ -274,6 +283,18 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s, ) ) +cdef _get_py_list_from_list(unique_ptr[scalar]& s): + + if not s.get()[0].is_valid(): + return cudf.NA + + cdef column_view list_col_view = (s.get()).view() + cdef Column list_col = Column.from_column_view(list_col_view, None) + cdef Table to_arrow_table = Table({"col": list_col}) + + arrow_table = to_arrow(to_arrow_table, [["col", []]]) + return arrow_table['col'].to_pylist() + cdef _get_py_string_from_string(unique_ptr[scalar]& s): if not s.get()[0].is_valid(): return cudf.NA @@ -449,17 +470,10 @@ def _create_proxy_nat_scalar(dtype): def test_get_test_list_scalar(Column input_column): cdef column_view in_col = input_column.view() - cdef unique_ptr[scalar] unique_ptr_to_scalar = get_test_list_scalar(in_col) - cdef DeviceScalar devslr = DeviceScalar.__new__(DeviceScalar) - devslr.c_value = move(unique_ptr_to_scalar) - cdef list_scalar* raw_ptr = (devslr.c_value).get() - cdef column_view the_scalars_data = raw_ptr[0].view() + cdef DeviceScalar result_slr = DeviceScalar.from_unique_ptr(get_test_list_scalar(in_col)) - cdef Column output_column = Column.from_column_view(the_scalars_data, None) - - cdef Table to_arrow_table = Table({"col": output_column}) - arrow_table = to_arrow(to_arrow_table, [["col", []]]) + print(result_slr._dtype.__repr__()) - return arrow_table['col'].to_pylist() + return result_slr._to_host_scalar() From 1d0d4e924c0de6967480ce93077c5e2fe9711682 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 17 May 2021 14:23:59 -0700 Subject: [PATCH 03/11] plumbing and tests --- python/cudf/cudf/_lib/scalar.pyx | 39 +++++++++++++++++++---------- python/cudf/cudf/core/indexing.py | 2 +- python/cudf/cudf/tests/test_list.py | 18 +++++++++++++ 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index eb1c96f8a3b..e58e63aa5d7 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -18,6 +18,7 @@ from libcpp.utility cimport move from libcpp cimport bool import cudf +from cudf.core.dtypes import ListDtype from cudf._lib.types import ( cudf_to_np_types, duration_unit_map @@ -171,9 +172,18 @@ cdef class DeviceScalar: "Must pass a dtype when constructing from a fixed-point scalar" ) elif cdtype.id() == libcudf_types.LIST: - s._dtype = dtype_from_column_view( - (s.get_raw_ptr())[0].view() - ) + if (s.get_raw_ptr())[0].view().type().id() == libcudf_types.LIST: + s._dtype = dtype_from_column_view( + (s.get_raw_ptr())[0].view() + ) + else: + s._dtype = ListDtype( + cudf_to_np_types[ + ( + (s.get_raw_ptr())[0].view().type().id() + ) + ] + ) else: if dtype is not None: s._dtype = dtype @@ -293,7 +303,8 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s): cdef Table to_arrow_table = Table({"col": list_col}) arrow_table = to_arrow(to_arrow_table, [["col", []]]) - return arrow_table['col'].to_pylist() + result = arrow_table['col'].to_pylist() + return _nested_na_replace(result) cdef _get_py_string_from_string(unique_ptr[scalar]& s): if not s.get()[0].is_valid(): @@ -468,12 +479,14 @@ def _create_proxy_nat_scalar(dtype): else: raise TypeError('NAT only valid for datetime and timedelta') -def test_get_test_list_scalar(Column input_column): - cdef column_view in_col = input_column.view() - - - cdef DeviceScalar result_slr = DeviceScalar.from_unique_ptr(get_test_list_scalar(in_col)) - - print(result_slr._dtype.__repr__()) - - return result_slr._to_host_scalar() +def _nested_na_replace(input_list): + ''' + Replace `None` with `cudf.NA` in the result of + `__getitem__` calls to list type columns + ''' + for idx, value in enumerate(input_list): + if isinstance(value, list): + _nested_na_replace(value) + elif value is None: + input_list[idx] = cudf.NA + return input_list diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index 7de1aaf9726..9413fb51edd 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -85,7 +85,7 @@ def __getitem__(self, arg): arg = list(arg) data = self._sr._column[arg] - if is_scalar(data) or _is_null_host_scalar(data): + if isinstance(data, list) or is_scalar(data) or _is_null_host_scalar(data): return data index = self._sr.index.take(arg) return self._sr._copy_construct(data=data, index=index) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 5dcecc6c9e1..3c86cf12f42 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -332,3 +332,21 @@ def test_concatenate_list_with_nonlist(): gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]}) gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]}) gdf1["A"] + gdf2["A"] + +@pytest.mark.parametrize('data', [ + [1], + [1, 2, 3], + [ + [1, 2, 3], + [4, 5, 6] + ], + [None], + [1, None, 3], + [ + [1, None, 3], + [None, 5, 6] + ] +]) +def test_list_getitem(data): + list_sr = cudf.Series([data]) + assert data == list_sr[0] From 5c1e13d44ae1ef3b40476bfa5b060eea031dec6d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 17 May 2021 14:26:07 -0700 Subject: [PATCH 04/11] remove original test code --- cpp/include/cudf/scalar/scalar.hpp | 1 - cpp/src/scalar/scalar.cpp | 7 ------- 2 files changed, 8 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 716e7bba68d..c2cbcd00684 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -572,7 +572,6 @@ class list_scalar : public scalar { cudf::column _data; }; -std::unique_ptr get_test_list_scalar(column_view in_col); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 77ef722f6b7..991e911a697 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -95,14 +95,7 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const return result; } -std::unique_ptr get_test_list_scalar(column_view in_col) { - std::unique_ptr out = std::make_unique( - cudf::list_scalar(in_col) - ); - return out; -} - template fixed_point_scalar::fixed_point_scalar() : scalar(data_type(type_to_id())){}; From 610e31c39838dcc47d217813c1d20b04755073b6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 17 May 2021 14:26:58 -0700 Subject: [PATCH 05/11] cleanup diff --- cpp/include/cudf/scalar/scalar.hpp | 1 - cpp/src/scalar/scalar.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index c2cbcd00684..3de8762c763 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -572,6 +572,5 @@ class list_scalar : public scalar { cudf::column _data; }; - /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 991e911a697..858d2c063b3 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -95,7 +95,6 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const return result; } - template fixed_point_scalar::fixed_point_scalar() : scalar(data_type(type_to_id())){}; From b1d4306535fb22a72f3920200222abd5a0126eb2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 17 May 2021 14:28:39 -0700 Subject: [PATCH 06/11] little cython cleanup --- python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 2 -- python/cudf/cudf/_lib/scalar.pyx | 1 - 2 files changed, 3 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index 5cf24e978f9..6525f29af23 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -67,5 +67,3 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass list_scalar(scalar): column_view view() except + - - cdef unique_ptr[scalar] get_test_list_scalar(column_view in_col) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index e58e63aa5d7..b1f8e4bf7cf 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -47,7 +47,6 @@ from cudf._lib.cpp.scalar.scalar cimport ( string_scalar, fixed_point_scalar, list_scalar, - get_test_list_scalar, ) from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype cimport cudf._lib.cpp.types as libcudf_types From f941d12c8cf68f9957677b876aa1a4a5f7ffc97a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 07:27:04 -0700 Subject: [PATCH 07/11] minor changes --- python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 1 - python/cudf/cudf/tests/test_list.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index 6525f29af23..de5cb05447c 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -9,7 +9,6 @@ from libcpp.string cimport string from cudf._lib.cpp.types cimport data_type from cudf._lib.cpp.wrappers.decimals cimport scale_type -from libcpp.memory cimport unique_ptr from cudf._lib.cpp.column.column_view cimport column_view diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 3c86cf12f42..249b4497e88 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -9,6 +9,7 @@ import cudf from cudf.tests.utils import assert_eq +from cudf import NA @pytest.mark.parametrize( "data", From c482173867947ee180f161d7f6b6792fa8083df4 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 14:41:16 -0700 Subject: [PATCH 08/11] move goalposts --- python/cudf/cudf/tests/test_list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 249b4497e88..29aa76f6292 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -9,7 +9,6 @@ import cudf from cudf.tests.utils import assert_eq -from cudf import NA @pytest.mark.parametrize( "data", @@ -350,4 +349,5 @@ def test_concatenate_list_with_nonlist(): ]) def test_list_getitem(data): list_sr = cudf.Series([data]) - assert data == list_sr[0] + # __getitem__ shall fill None with cudf.NA + assert pa.scalar(data) == list_sr.to_arrow()[0] From 1701d0b5da0b3758684aa51bb1dabb96db4fa05e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 15:01:10 -0700 Subject: [PATCH 09/11] style --- python/cudf/cudf/_lib/scalar.pyx | 14 ++++++-------- python/cudf/cudf/core/indexing.py | 6 +++++- python/cudf/cudf/tests/test_list.py | 20 +++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index b1f8e4bf7cf..4d56be25de1 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -20,7 +20,7 @@ from libcpp cimport bool import cudf from cudf.core.dtypes import ListDtype from cudf._lib.types import ( - cudf_to_np_types, + cudf_to_np_types, duration_unit_map ) from cudf._lib.types import datetime_unit_map @@ -50,11 +50,6 @@ from cudf._lib.cpp.scalar.scalar cimport ( ) from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype cimport cudf._lib.cpp.types as libcudf_types -from cudf._lib.column cimport Column -from cudf._lib.cpp.column.column_view cimport column_view -from cudf._lib.table cimport Table -from cudf._lib.interop import to_arrow - cdef class DeviceScalar: @@ -171,7 +166,8 @@ cdef class DeviceScalar: "Must pass a dtype when constructing from a fixed-point scalar" ) elif cdtype.id() == libcudf_types.LIST: - if (s.get_raw_ptr())[0].view().type().id() == libcudf_types.LIST: + if (s.get_raw_ptr())[0] + .view().type().id() == libcudf_types.LIST: s._dtype = dtype_from_column_view( (s.get_raw_ptr())[0].view() ) @@ -179,7 +175,8 @@ cdef class DeviceScalar: s._dtype = ListDtype( cudf_to_np_types[ ( - (s.get_raw_ptr())[0].view().type().id() + (s.get_raw_ptr())[0] + .view().type().id() ) ] ) @@ -478,6 +475,7 @@ def _create_proxy_nat_scalar(dtype): else: raise TypeError('NAT only valid for datetime and timedelta') + def _nested_na_replace(input_list): ''' Replace `None` with `cudf.NA` in the result of diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index 9413fb51edd..21d075ae67d 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -85,7 +85,11 @@ def __getitem__(self, arg): arg = list(arg) data = self._sr._column[arg] - if isinstance(data, list) or is_scalar(data) or _is_null_host_scalar(data): + if ( + isinstance(data, list) + or is_scalar(data) + or _is_null_host_scalar(data) + ): return data index = self._sr.index.take(arg) return self._sr._copy_construct(data=data, index=index) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 29aa76f6292..e596c3cfeb1 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -333,20 +333,18 @@ def test_concatenate_list_with_nonlist(): gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]}) gdf1["A"] + gdf2["A"] -@pytest.mark.parametrize('data', [ - [1], - [1, 2, 3], + +@pytest.mark.parametrize( + "data", [ + [1], [1, 2, 3], - [4, 5, 6] - ], - [None], - [1, None, 3], - [ + [[1, 2, 3], [4, 5, 6]], + [None], [1, None, 3], - [None, 5, 6] - ] -]) + [[1, None, 3], [None, 5, 6]], + ], +) def test_list_getitem(data): list_sr = cudf.Series([data]) # __getitem__ shall fill None with cudf.NA From 0448312de4fcd6315d381b5595249842a65de565 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 18 May 2021 15:08:03 -0700 Subject: [PATCH 10/11] compilation fixes --- python/cudf/cudf/_lib/scalar.pyx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 4d56be25de1..cb355a15f15 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -26,6 +26,11 @@ from cudf._lib.types import ( from cudf._lib.types import datetime_unit_map from cudf._lib.types cimport underlying_type_t_type_id, dtype_from_column_view +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.table cimport Table +from cudf._lib.interop import to_arrow + from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_s, timestamp_ms, @@ -166,8 +171,9 @@ cdef class DeviceScalar: "Must pass a dtype when constructing from a fixed-point scalar" ) elif cdtype.id() == libcudf_types.LIST: - if (s.get_raw_ptr())[0] - .view().type().id() == libcudf_types.LIST: + if ( + s.get_raw_ptr() + )[0].view().type().id() == libcudf_types.LIST: s._dtype = dtype_from_column_view( (s.get_raw_ptr())[0].view() ) From 5e75734c3a13fe67ac1440349032395220c7e713 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 19 May 2021 11:26:28 -0700 Subject: [PATCH 11/11] fix tests --- python/cudf/cudf/tests/test_list.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index e596c3cfeb1..7edcb08a7c8 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -7,6 +7,7 @@ import pytest import cudf +from cudf import NA from cudf.tests.utils import assert_eq @@ -335,17 +336,17 @@ def test_concatenate_list_with_nonlist(): @pytest.mark.parametrize( - "data", + "indata,expect", [ - [1], - [1, 2, 3], - [[1, 2, 3], [4, 5, 6]], - [None], - [1, None, 3], - [[1, None, 3], [None, 5, 6]], + ([1], [1]), + ([1, 2, 3], [1, 2, 3]), + ([[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]), + ([None], [NA]), + ([1, None, 3], [1, NA, 3]), + ([[1, None, 3], [None, 5, 6]], [[1, NA, 3], [NA, 5, 6]]), ], ) -def test_list_getitem(data): - list_sr = cudf.Series([data]) +def test_list_getitem(indata, expect): + list_sr = cudf.Series([indata]) # __getitem__ shall fill None with cudf.NA - assert pa.scalar(data) == list_sr.to_arrow()[0] + assert list_sr[0] == expect