From 586e655cdceaf812e36c18f5cf9f833c112f0b52 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 11:08:17 -0700 Subject: [PATCH 01/10] Remove unused functions for generating mutable table views. --- python/cudf/cudf/_lib/table.pxd | 4 --- python/cudf/cudf/_lib/table.pyx | 50 --------------------------------- 2 files changed, 54 deletions(-) diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index 0730199c8a9..73bc82428aa 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -10,12 +10,8 @@ cdef class Table: cdef dict __dict__ cdef table_view view(self) except * - cdef mutable_table_view mutable_view(self) except * cdef table_view data_view(self) except * - cdef mutable_table_view mutable_data_view(self) except * cdef table_view index_view(self) except * - cdef mutable_table_view mutable_index_view(self) except * cdef table_view make_table_view(columns) except * -cdef mutable_table_view make_mutable_table_view(columns) except * cdef columns_from_ptr(unique_ptr[table] c_tbl) diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 2981a46a54a..2f41d4951cf 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -87,22 +87,6 @@ cdef class Table: ) ) - cdef mutable_table_view mutable_view(self) except *: - """ - Return a cudf::mutable_table_view of all columns - (including index columns) of this Table. - """ - if self._index is None: - return make_mutable_table_view( - self._data.columns - ) - return make_mutable_table_view( - itertools.chain( - self._index._data.columns, - self._data.columns, - ) - ) - cdef table_view data_view(self) except *: """ Return a cudf::table_view of just the data columns @@ -112,15 +96,6 @@ cdef class Table: self._data.columns ) - cdef mutable_table_view mutable_data_view(self) except *: - """ - Return a cudf::mutable_table_view of just the data columns - of this Table. - """ - return make_mutable_table_view( - self._data.columns - ) - cdef table_view index_view(self) except *: """ Return a cudf::table_view of just the index columns @@ -133,18 +108,6 @@ cdef class Table: self._index.values() ) - cdef mutable_table_view mutable_index_view(self) except *: - """ - Return a cudf::mutable_table_view of just the index columns - of this Table. - """ - if self._index is None: - raise ValueError("Cannot get mutable_index_view of a Table " - "that has no index") - return make_mutable_table_view( - self._index._data.columns - ) - cdef table_view make_table_view(columns) except*: """ @@ -159,19 +122,6 @@ cdef table_view make_table_view(columns) except*: return table_view(column_views) -cdef mutable_table_view make_mutable_table_view(columns) except*: - """ - Helper function to create a cudf::mutable_table_view from - a list of Columns - """ - cdef vector[mutable_column_view] mutable_column_views - - cdef Column col - for col in columns: - mutable_column_views.push_back(col.mutable_view()) - - return mutable_table_view(mutable_column_views) - cdef columns_from_ptr(unique_ptr[table] c_tbl): """ Return a list of table columns from a unique pointer From e5cc823d5f2c491b2df959cde137af80e4ed056b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 11:12:17 -0700 Subject: [PATCH 02/10] Remove unused index_view. --- python/cudf/cudf/_lib/table.pxd | 1 - python/cudf/cudf/_lib/table.pyx | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index 73bc82428aa..e4d59dba86f 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -11,7 +11,6 @@ cdef class Table: cdef table_view view(self) except * cdef table_view data_view(self) except * - cdef table_view index_view(self) except * cdef table_view make_table_view(columns) except * cdef columns_from_ptr(unique_ptr[table] c_tbl) diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 2f41d4951cf..65fdde9b7ab 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -96,18 +96,6 @@ cdef class Table: self._data.columns ) - cdef table_view index_view(self) except *: - """ - Return a cudf::table_view of just the index columns - of this Table. - """ - if self._index is None: - raise ValueError("Cannot get index_view of a Table " - "that has no index") - return make_table_view( - self._index.values() - ) - cdef table_view make_table_view(columns) except*: """ From e7db0298856aab12d47a33d96651f8a423a3804f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 11:27:14 -0700 Subject: [PATCH 03/10] Remove unused columns_from_ptr function. --- python/cudf/cudf/_lib/join.pyx | 2 +- python/cudf/cudf/_lib/table.pxd | 1 - python/cudf/cudf/_lib/table.pyx | 19 ------------------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx index 186f8d32aeb..bf9f5a2e831 100644 --- a/python/cudf/cudf/_lib/join.pyx +++ b/python/cudf/cudf/_lib/join.pyx @@ -16,7 +16,7 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport data_type, size_type, type_id -from cudf._lib.table cimport Table, columns_from_ptr +from cudf._lib.table cimport Table # The functions below return the *gathermaps* that represent # the join result when joining on the keys `lhs` and `rhs`. diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index e4d59dba86f..c5e2ce2eb82 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -13,4 +13,3 @@ cdef class Table: cdef table_view data_view(self) except * cdef table_view make_table_view(columns) except * -cdef columns_from_ptr(unique_ptr[table] c_tbl) diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 65fdde9b7ab..2b576f4eaad 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -109,22 +109,3 @@ cdef table_view make_table_view(columns) except*: column_views.push_back(col.view()) return table_view(column_views) - -cdef columns_from_ptr(unique_ptr[table] c_tbl): - """ - Return a list of table columns from a unique pointer - - Parameters - ---------- - c_tbl : unique_ptr[cudf::table] - """ - num_columns = c_tbl.get().num_columns() - cdef vector[unique_ptr[column]] columns - columns = move(c_tbl.get()[0].release()) - cdef vector[unique_ptr[column]].iterator it = columns.begin() - - result = [None] * num_columns - for i in range(num_columns): - result[i] = Column.from_unique_ptr(move(dereference(it))) - it += 1 - return result From f9e601ca641fad176a36c579bf454f910ee3d9b4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 11:35:54 -0700 Subject: [PATCH 04/10] Rename make_table_view to reflect appropriate arguments. --- python/cudf/cudf/_lib/copying.pyx | 7 ++++--- python/cudf/cudf/_lib/csv.pyx | 2 +- python/cudf/cudf/_lib/scalar.pyx | 4 ++-- python/cudf/cudf/_lib/table.pxd | 2 +- python/cudf/cudf/_lib/table.pyx | 8 ++++---- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 88f54632000..85105d7d666 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -19,7 +19,7 @@ from cudf._lib.column cimport Column from cudf._lib.scalar import as_device_scalar from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table, make_table_view +from cudf._lib.table cimport Table, table_view_from_columns from cudf._lib.reduce import minmax from cudf.core.abc import Serializable @@ -200,7 +200,8 @@ def scatter(object source, Column scatter_map, Column target_column, """ cdef column_view scatter_map_view = scatter_map.view() - cdef table_view target_table_view = make_table_view((target_column,)) + cdef table_view target_table_view = table_view_from_columns( + (target_column,)) cdef bool c_bounds_check = bounds_check cdef unique_ptr[table] c_result @@ -212,7 +213,7 @@ def scatter(object source, Column scatter_map, Column target_column, cdef DeviceScalar slr if isinstance(source, Column): - source_table_view = make_table_view(( source,)) + source_table_view = table_view_from_columns(( source,)) with nogil: c_result = move( diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index 9912a7801a4..abfe4373e96 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -44,7 +44,7 @@ from cudf._lib.cpp.io.types cimport ( ) from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.io.utils cimport make_sink_info, make_source_info -from cudf._lib.table cimport Table, make_table_view +from cudf._lib.table cimport Table, table_view_from_columns from cudf._lib.utils cimport data_from_unique_ptr ctypedef int32_t underlying_type_t_compression diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index c0cae16d9ef..cd249e5c3cb 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -30,7 +30,7 @@ from cudf.core.dtypes import ListDtype, StructDtype from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table, make_table_view +from cudf._lib.table cimport Table, table_view_from_columns from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id from cudf._lib.interop import from_arrow, to_arrow @@ -339,7 +339,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s, ) data, _ = from_arrow(pyarrow_table, column_names=columns) - cdef table_view struct_view = make_table_view(data.values()) + cdef table_view struct_view = table_view_from_columns(data.values()) s.reset( new struct_scalar(struct_view, valid) diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index c5e2ce2eb82..09f6072953d 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -12,4 +12,4 @@ cdef class Table: cdef table_view view(self) except * cdef table_view data_view(self) except * -cdef table_view make_table_view(columns) except * +cdef table_view table_view_from_columns(columns) except * diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 2b576f4eaad..926b87615d2 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -77,10 +77,10 @@ cdef class Table: of this Table. """ if self._index is None: - return make_table_view( + return table_view_from_columns( self._data.columns ) - return make_table_view( + return table_view_from_columns( itertools.chain( self._index._data.columns, self._data.columns, @@ -92,12 +92,12 @@ cdef class Table: Return a cudf::table_view of just the data columns of this Table. """ - return make_table_view( + return table_view_from_columns( self._data.columns ) -cdef table_view make_table_view(columns) except*: +cdef table_view table_view_from_columns(columns) except*: """ Helper function to create a cudf::table_view from a list of Columns From b28d74dba400b56a91da4409a72b743847037a04 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 12:30:10 -0700 Subject: [PATCH 05/10] Inline the functions to make lists of table views for concat. --- python/cudf/cudf/_lib/concat.pyx | 17 +++++++---------- python/cudf/cudf/_lib/utils.pxd | 2 -- python/cudf/cudf/_lib/utils.pyx | 16 ---------------- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/python/cudf/cudf/_lib/concat.pyx b/python/cudf/cudf/_lib/concat.pyx index 5266d0ac773..28593be6d89 100644 --- a/python/cudf/cudf/_lib/concat.pyx +++ b/python/cudf/cudf/_lib/concat.pyx @@ -14,12 +14,7 @@ from cudf._lib.cpp.concatenate cimport ( ) from cudf._lib.cpp.table.table cimport table, table_view from cudf._lib.table cimport Table -from cudf._lib.utils cimport ( - data_from_unique_ptr, - make_column_views, - make_table_data_views, - make_table_views, -) +from cudf._lib.utils cimport data_from_unique_ptr, make_column_views from cudf.core.buffer import Buffer @@ -47,10 +42,12 @@ cpdef concat_columns(object columns): cpdef concat_tables(object tables, bool ignore_index=False): cdef unique_ptr[table] c_result cdef vector[table_view] c_views - if ignore_index is False: - c_views = make_table_views(tables) - else: - c_views = make_table_data_views(tables) + c_views.reserve(len(tables)) + for tbl in tables: + if ignore_index: + c_views.push_back(( tbl).data_view()) + else: + c_views.push_back((
tbl).view()) with nogil: c_result = move(libcudf_concatenate_tables(c_views)) diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd index f9b225a0b89..ef429dff1ed 100644 --- a/python/cudf/cudf/_lib/utils.pxd +++ b/python/cudf/cudf/_lib/utils.pxd @@ -10,8 +10,6 @@ from cudf._lib.table cimport Table cdef vector[column_view] make_column_views(object columns) except* -cdef vector[table_view] make_table_views(object tables) except* -cdef vector[table_view] make_table_data_views(object tables) except* cdef vector[string] get_column_names(Table table, object index) except* cdef data_from_unique_ptr( unique_ptr[table] c_tbl, column_names, index_names=*) diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 2456aa334e9..f7607b6b22e 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -45,22 +45,6 @@ cdef vector[column_view] make_column_views(object columns): return views -cdef vector[table_view] make_table_views(object tables): - cdef vector[table_view] views - views.reserve(len(tables)) - for tbl in tables: - views.push_back((
tbl).view()) - return views - - -cdef vector[table_view] make_table_data_views(object tables): - cdef vector[table_view] views - views.reserve(len(tables)) - for tbl in tables: - views.push_back((
tbl).data_view()) - return views - - cdef vector[string] get_column_names(Table table, object index): cdef vector[string] column_names if index is not False: From 8a504f02963f8f282dec60cbfbd01c04f90efd6c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 15:05:38 -0700 Subject: [PATCH 06/10] Replace all usage of view and data_view. --- python/cudf/cudf/_lib/concat.pyx | 7 +-- python/cudf/cudf/_lib/copying.pyx | 54 +++++++++------------ python/cudf/cudf/_lib/csv.pyx | 11 +++-- python/cudf/cudf/_lib/filling.pyx | 6 +-- python/cudf/cudf/_lib/groupby.pyx | 10 ++-- python/cudf/cudf/_lib/hash.pyx | 12 ++--- python/cudf/cudf/_lib/interop.pyx | 8 +-- python/cudf/cudf/_lib/join.pyx | 10 ++-- python/cudf/cudf/_lib/lists.pyx | 8 ++- python/cudf/cudf/_lib/merge.pyx | 8 ++- python/cudf/cudf/_lib/orc.pyx | 17 +++---- python/cudf/cudf/_lib/parquet.pyx | 17 ++++--- python/cudf/cudf/_lib/partitioning.pyx | 11 ++--- python/cudf/cudf/_lib/quantiles.pyx | 4 +- python/cudf/cudf/_lib/reshape.pyx | 6 +-- python/cudf/cudf/_lib/scalar.pyx | 6 ++- python/cudf/cudf/_lib/search.pyx | 6 +-- python/cudf/cudf/_lib/sort.pyx | 18 ++++--- python/cudf/cudf/_lib/stream_compaction.pyx | 14 +++--- python/cudf/cudf/_lib/strings/combine.pyx | 4 +- python/cudf/cudf/_lib/table.pxd | 1 + python/cudf/cudf/_lib/table.pyx | 25 +++++++--- python/cudf/cudf/_lib/transform.pyx | 6 +-- python/cudf/cudf/_lib/transpose.pyx | 4 +- 24 files changed, 140 insertions(+), 133 deletions(-) diff --git a/python/cudf/cudf/_lib/concat.pyx b/python/cudf/cudf/_lib/concat.pyx index 28593be6d89..5969aeaee77 100644 --- a/python/cudf/cudf/_lib/concat.pyx +++ b/python/cudf/cudf/_lib/concat.pyx @@ -13,7 +13,7 @@ from cudf._lib.cpp.concatenate cimport ( concatenate_tables as libcudf_concatenate_tables, ) from cudf._lib.cpp.table.table cimport table, table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr, make_column_views from cudf.core.buffer import Buffer @@ -44,10 +44,7 @@ cpdef concat_tables(object tables, bool ignore_index=False): cdef vector[table_view] c_views c_views.reserve(len(tables)) for tbl in tables: - if ignore_index: - c_views.push_back((
tbl).data_view()) - else: - c_views.push_back((
tbl).view()) + c_views.push_back(table_view_from_table(tbl, ignore_index)) with nogil: c_result = move(libcudf_concatenate_tables(c_views)) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 85105d7d666..386409e4262 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -19,7 +19,11 @@ from cudf._lib.column cimport Column from cudf._lib.scalar import as_device_scalar from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table, table_view_from_columns +from cudf._lib.table cimport ( + Table, + table_view_from_columns, + table_view_from_table, +) from cudf._lib.reduce import minmax from cudf.core.abc import Serializable @@ -160,11 +164,9 @@ def gather( f" rows.") cdef unique_ptr[table] c_result - cdef table_view source_table_view - if keep_index is True: - source_table_view = source_table.view() - else: - source_table_view = source_table.data_view() + cdef table_view source_table_view = table_view_from_table( + source_table, not keep_index + ) cdef column_view gather_map_view = gather_map.view() cdef cpp_copying.out_of_bounds_policy policy = ( cpp_copying.out_of_bounds_policy.NULLIFY if nullify @@ -264,7 +266,7 @@ def _reverse_column(Column source_column): def _reverse_table(Table source_table): - cdef table_view reverse_table_view = source_table.view() + cdef table_view reverse_table_view = table_view_from_columns(source_table) cdef unique_ptr[table] c_result with nogil: @@ -326,11 +328,9 @@ def column_allocate_like(Column input_column, size=None): def table_empty_like(Table input_table, bool keep_index=True): - cdef table_view input_table_view - if keep_index is True: - input_table_view = input_table.view() - else: - input_table_view = input_table.data_view() + cdef table_view input_table_view = table_view_from_table( + input_table, not keep_index + ) cdef unique_ptr[table] c_result @@ -377,11 +377,9 @@ def column_slice(Column input_column, object indices): def table_slice(Table input_table, object indices, bool keep_index=True): - cdef table_view input_table_view - if keep_index is True: - input_table_view = input_table.view() - else: - input_table_view = input_table.data_view() + cdef table_view input_table_view = table_view_from_table( + input_table, not keep_index + ) cdef vector[size_type] c_indices c_indices.reserve(len(indices)) @@ -446,11 +444,9 @@ def column_split(Column input_column, object splits): def table_split(Table input_table, object splits, bool keep_index=True): - cdef table_view input_table_view - if keep_index is True: - input_table_view = input_table.view() - else: - input_table_view = input_table.data_view() + cdef table_view input_table_view = table_view_from_table( + input_table, not keep_index + ) cdef vector[size_type] c_splits c_splits.reserve(len(splits)) @@ -589,8 +585,8 @@ def copy_if_else(object lhs, object rhs, Column boolean_mask): def _boolean_mask_scatter_table(Table input_table, Table target_table, Column boolean_mask): - cdef table_view input_table_view = input_table.view() - cdef table_view target_table_view = target_table.view() + cdef table_view input_table_view = table_view_from_columns(input_table) + cdef table_view target_table_view = table_view_from_columns(target_table) cdef column_view boolean_mask_view = boolean_mask.view() cdef unique_ptr[table] c_result @@ -620,7 +616,7 @@ def _boolean_mask_scatter_scalar(list input_scalars, Table target_table, for scl in input_scalars: input_scalar_vector.push_back(reference_wrapper[constscalar]( scl.get_raw_ptr()[0])) - cdef table_view target_table_view = target_table.view() + cdef table_view target_table_view = table_view_from_columns(target_table) cdef column_view boolean_mask_view = boolean_mask.view() cdef unique_ptr[table] c_result @@ -703,9 +699,7 @@ def get_element(Column input_column, size_type index): def sample(Table input, size_type n, bool replace, int64_t seed, bool keep_index=True): - cdef table_view tbl_view = ( - input.view() if keep_index else input.data_view() - ) + cdef table_view tbl_view = table_view_from_table(input, not keep_index) cdef cpp_copying.sample_with_replacement replacement if replace: @@ -765,10 +759,10 @@ cdef class _CPackedColumns: or input_table.index.stop != len(input_table) or input_table.index.step != 1 ): - input_table_view = input_table.view() + input_table_view = table_view_from_table(input_table) p.index_names = input_table._index_names else: - input_table_view = input_table.data_view() + input_table_view = table_view_from_table(input_table, True) p.column_names = input_table._column_names p.column_dtypes = {} diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index abfe4373e96..45d4a8a6073 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -44,7 +44,11 @@ from cudf._lib.cpp.io.types cimport ( ) from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.io.utils cimport make_sink_info, make_source_info -from cudf._lib.table cimport Table, table_view_from_columns +from cudf._lib.table cimport ( + Table, + table_view_from_columns, + table_view_from_table, +) from cudf._lib.utils cimport data_from_unique_ptr ctypedef int32_t underlying_type_t_compression @@ -458,8 +462,9 @@ cpdef write_csv( -------- cudf.to_csv """ - cdef table_view input_table_view = \ - table.view() if index is True else table.data_view() + cdef table_view input_table_view = table_view_from_table( + table, not index + ) cdef bool include_header_c = header cdef char delim_c = ord(sep) cdef string line_term_c = line_terminator.encode() diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx index 99a3957006b..dfeb08832f4 100644 --- a/python/cudf/cudf/_lib/filling.pyx +++ b/python/cudf/cudf/_lib/filling.pyx @@ -15,7 +15,7 @@ from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -59,7 +59,7 @@ def repeat(Table inp, object count, bool check_count=False): def _repeat_via_column(Table inp, Column count, bool check_count): - cdef table_view c_inp = inp.view() + cdef table_view c_inp = table_view_from_table(inp) cdef column_view c_count = count.view() cdef bool c_check_count = check_count cdef unique_ptr[table] c_result @@ -79,7 +79,7 @@ def _repeat_via_column(Table inp, Column count, bool check_count): def _repeat_via_size_type(Table inp, size_type count): - cdef table_view c_inp = inp.view() + cdef table_view c_inp = table_view_from_table(inp) cdef unique_ptr[table] c_result with nogil: diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 153b116cd33..2aea8a57a0b 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -26,7 +26,7 @@ import cudf from cudf._lib.column cimport Column from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.scalar import as_device_scalar @@ -74,7 +74,7 @@ cdef class GroupBy: else: c_null_handling = libcudf_types.null_policy.INCLUDE - cdef table_view keys_view = keys.view() + cdef table_view keys_view = table_view_from_table(keys) with nogil: self.c_obj.reset( @@ -90,7 +90,7 @@ cdef class GroupBy: def groups(self, Table values): - cdef table_view values_view = values.view() + cdef table_view values_view = table_view_from_table(values) with nogil: c_groups = move(self.c_obj.get()[0].get_groups(values_view)) @@ -293,7 +293,7 @@ cdef class GroupBy: return self.aggregate_internal(values, aggregations) def shift(self, Table values, int periods, list fill_values): - cdef table_view view = values.view() + cdef table_view view = table_view_from_table(values) cdef size_type num_col = view.num_columns() cdef vector[size_type] offsets = vector[size_type](num_col, periods) @@ -329,7 +329,7 @@ cdef class GroupBy: return shifted, grouped_keys def replace_nulls(self, Table values, object method): - cdef table_view val_view = values.view() + cdef table_view val_view = table_view_from_table(values) cdef pair[unique_ptr[table], unique_ptr[table]] c_result cdef replace_policy policy = ( replace_policy.PRECEDING diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index 137b19ef69c..2d6197a46a2 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -14,7 +14,7 @@ from cudf._lib.cpp.hash cimport hash as cpp_hash from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -22,11 +22,9 @@ def hash_partition(Table source_table, object columns_to_hash, int num_partitions, bool keep_index=True): cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash cdef int c_num_partitions = num_partitions - cdef table_view c_source_view - if keep_index is True: - c_source_view = source_table.view() - else: - c_source_view = source_table.data_view() + cdef table_view c_source_view = table_view_from_table( + source_table, not keep_index + ) cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result with nogil: @@ -59,7 +57,7 @@ def hash_partition(Table source_table, object columns_to_hash, def hash(Table source_table, object initial_hash_values=None, int seed=0): cdef vector[uint32_t] c_initial_hash = initial_hash_values or [] - cdef table_view c_source_view = source_table.data_view() + cdef table_view c_source_view = table_view_from_table(source_table, True) cdef unique_ptr[column] c_result with nogil: diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 234513733d1..11d20cf02d2 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -20,7 +20,7 @@ from cudf._lib.cpp.interop cimport ( ) from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -63,7 +63,9 @@ def to_dlpack(Table source_table): ) cdef DLManagedTensor *dlpack_tensor - cdef table_view source_table_view = source_table.data_view() + cdef table_view source_table_view = table_view_from_table( + source_table, True + ) with nogil: dlpack_tensor = cpp_to_dlpack( @@ -128,7 +130,7 @@ def to_arrow(Table input_table, cdef vector[column_metadata] cpp_metadata = gather_metadata(metadata) cdef table_view input_table_view = ( - input_table.view() if keep_index else input_table.data_view() + table_view_from_table(input_table, not keep_index) ) cdef shared_ptr[CTable] cpp_arrow_table diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx index bf9f5a2e831..dce9a4703c9 100644 --- a/python/cudf/cudf/_lib/join.pyx +++ b/python/cudf/cudf/_lib/join.pyx @@ -16,15 +16,15 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport data_type, size_type, type_id -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table # The functions below return the *gathermaps* that represent # the join result when joining on the keys `lhs` and `rhs`. cpdef join(Table lhs, Table rhs, how=None): cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result - cdef table_view c_lhs = lhs.view() - cdef table_view c_rhs = rhs.view() + cdef table_view c_lhs = table_view_from_table(lhs) + cdef table_view c_rhs = table_view_from_table(rhs) if how == "inner": c_result = move(cpp_join.inner_join( @@ -52,8 +52,8 @@ cpdef join(Table lhs, Table rhs, how=None): cpdef semi_join(Table lhs, Table rhs, how=None): # left-semi and left-anti joins cdef cpp_join.gather_map_type c_result - cdef table_view c_lhs = lhs.view() - cdef table_view c_rhs = rhs.view() + cdef table_view c_lhs = table_view_from_table(lhs) + cdef table_view c_rhs = table_view_from_table(rhs) if how == "leftsemi": c_result = move(cpp_join.left_semi_join( diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 59c3a4b89dc..882f2fbc9a0 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -33,7 +33,7 @@ from cudf._lib.cpp.types cimport ( size_type, ) from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.types cimport ( underlying_type_t_null_order, underlying_type_t_order, @@ -63,9 +63,7 @@ def count_elements(Column col): def explode_outer(Table tbl, int explode_column_idx, bool ignore_index=False): - cdef table_view c_table_view = ( - tbl.data_view() if ignore_index else tbl.view() - ) + cdef table_view c_table_view = table_view_from_table(tbl, ignore_index) cdef size_type c_explode_column_idx = explode_column_idx cdef unique_ptr[table] c_result @@ -168,7 +166,7 @@ def contains_scalar(Column col, object py_search_key): def concatenate_rows(Table tbl): cdef unique_ptr[column] c_result - cdef table_view c_table_view = tbl.view() + cdef table_view c_table_view = table_view_from_table(tbl) with nogil: c_result = move(cpp_concatenate_rows( diff --git a/python/cudf/cudf/_lib/merge.pyx b/python/cudf/cudf/_lib/merge.pyx index 83f088f4419..27f96cdbe60 100644 --- a/python/cudf/cudf/_lib/merge.pyx +++ b/python/cudf/cudf/_lib/merge.pyx @@ -10,7 +10,7 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.merge cimport merge as cpp_merge from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -34,10 +34,8 @@ def merge_sorted( # Use metadata from 0th table for names, etc c_input_tables.reserve(len(tables)) for source_table in tables: - if ignore_index: - c_input_tables.push_back(source_table.data_view()) - else: - c_input_tables.push_back(source_table.view()) + c_input_tables.push_back( + table_view_from_table(source_table, ignore_index)) source_table = tables[0] # Define sorting order and null precedence diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index b0cbfb33931..056f3156a29 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -40,7 +40,7 @@ from cudf._lib.io.utils cimport ( update_column_struct_field_names, update_struct_field_names, ) -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES @@ -154,8 +154,9 @@ cpdef write_orc(Table table, metadata_.column_names.push_back(str.encode(col_name)) cdef orc_writer_options c_orc_writer_options = move( - orc_writer_options.builder(sink_info_c, table.data_view()) - .metadata(&metadata_) + orc_writer_options.builder( + sink_info_c, table_view_from_table(table, True) + ).metadata(&metadata_) .compression(compression_) .enable_statistics( (True if enable_statistics else False)) .build() @@ -244,13 +245,11 @@ cdef class ORCWriter: if not self.initialized: self._initialize_chunked_state(table) - cdef table_view tv - if self.index is not False and ( + keep_index = self.index is not False and ( table._index.name is not None or - isinstance(table._index, cudf.core.multiindex.MultiIndex)): - tv = table.view() - else: - tv = table.data_view() + isinstance(table._index, cudf.core.multiindex.MultiIndex) + ) + tv = table_view_from_table(table, keep_index) with nogil: self.writer.get()[0].write(tv) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index e12a61f2a49..1247a852dc2 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -62,7 +62,7 @@ from cudf._lib.io.utils cimport ( make_source_info, update_struct_field_names, ) -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table cdef class BufferArrayFromVector: @@ -284,7 +284,7 @@ cpdef write_parquet( if index is True or ( index is None and not isinstance(table._index, cudf.RangeIndex) ): - tv = table.view() + tv = table_view_from_table(table) tbl_meta = make_unique[table_input_metadata](tv) for level, idx_name in enumerate(table._index.names): tbl_meta.get().column_metadata[level].set_name( @@ -294,7 +294,7 @@ cpdef write_parquet( ) num_index_cols_meta = len(table._index.names) else: - tv = table.data_view() + tv = table_view_from_table(table, True) tbl_meta = make_unique[table_input_metadata](tv) num_index_cols_meta = 0 @@ -380,9 +380,9 @@ cdef class ParquetWriter: if self.index is not False and ( table._index.name is not None or isinstance(table._index, cudf.core.multiindex.MultiIndex)): - tv = table.view() + tv = table_view_from_table(table) else: - tv = table.data_view() + tv = table_view_from_table(table, True) with nogil: self.writer.get()[0].write(tv) @@ -420,10 +420,11 @@ cdef class ParquetWriter: # Set the table_metadata num_index_cols_meta = 0 - self.tbl_meta = make_unique[table_input_metadata](table.data_view()) + self.tbl_meta = make_unique[table_input_metadata]( + table_view_from_table(table, True)) if self.index is not False: if isinstance(table._index, cudf.core.multiindex.MultiIndex): - tv = table.view() + tv = table_view_from_table(table) self.tbl_meta = make_unique[table_input_metadata](tv) for level, idx_name in enumerate(table._index.names): self.tbl_meta.get().column_metadata[level].set_name( @@ -432,7 +433,7 @@ cdef class ParquetWriter: num_index_cols_meta = len(table._index.names) else: if table._index.name is not None: - tv = table.view() + tv = table_view_from_table(table) self.tbl_meta = make_unique[table_input_metadata](tv) self.tbl_meta.get().column_metadata[0].set_name( str.encode(table._index.name) diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx index 90aa6bb0344..6d39c085186 100644 --- a/python/cudf/cudf/_lib/partitioning.pyx +++ b/python/cudf/cudf/_lib/partitioning.pyx @@ -11,7 +11,7 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.partitioning cimport partition as cpp_partition from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count @@ -25,12 +25,9 @@ def partition(Table source_table, Column partition_map, if num_partitions is None: num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True) cdef int c_num_partitions = num_partitions - cdef table_view c_source_view - - if keep_index is True: - c_source_view = source_table.view() - else: - c_source_view = source_table.data_view() + cdef table_view c_source_view = table_view_from_table( + source_table, not keep_index + ) cdef column_view c_partition_map_view = partition_map.view() diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx index 76bf587237c..4f78f6c4bf9 100644 --- a/python/cudf/cudf/_lib/quantiles.pyx +++ b/python/cudf/cudf/_lib/quantiles.pyx @@ -7,7 +7,7 @@ from libcpp.vector cimport vector from cudf._lib.column cimport Column from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.types cimport ( underlying_type_t_interpolation, underlying_type_t_null_order, @@ -81,7 +81,7 @@ def quantiles(Table source_table, object is_input_sorted, list column_order, list null_precedence): - cdef table_view c_input = source_table.data_view() + cdef table_view c_input = table_view_from_table(source_table, True) cdef vector[double] c_q = q cdef interpolation c_interp = ( interp diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx index acca2694d10..a660f2f389f 100644 --- a/python/cudf/cudf/_lib/reshape.pyx +++ b/python/cudf/cudf/_lib/reshape.pyx @@ -12,12 +12,12 @@ from cudf._lib.cpp.reshape cimport ( from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr def interleave_columns(Table source_table): - cdef table_view c_view = source_table.data_view() + cdef table_view c_view = table_view_from_table(source_table, True) cdef unique_ptr[column] c_result with nogil: @@ -30,7 +30,7 @@ def interleave_columns(Table source_table): def tile(Table source_table, size_type count): cdef size_type c_count = count - cdef table_view c_view = source_table.view() + cdef table_view c_view = table_view_from_table(source_table) cdef unique_ptr[table] c_result with nogil: diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index cd249e5c3cb..5b73cfd2231 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -30,7 +30,11 @@ from cudf.core.dtypes import ListDtype, StructDtype from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table, table_view_from_columns +from cudf._lib.table cimport ( + Table, + table_view_from_columns, + table_view_from_table, +) from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id from cudf._lib.interop import from_arrow, to_arrow diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx index 33471028d66..23c33f926ed 100644 --- a/python/cudf/cudf/_lib/search.pyx +++ b/python/cudf/cudf/_lib/search.pyx @@ -10,7 +10,7 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table def search_sorted( @@ -33,8 +33,8 @@ def search_sorted( cdef vector[libcudf_types.null_order] c_null_precedence cdef libcudf_types.order c_order cdef libcudf_types.null_order c_null_order - cdef table_view c_table_data = table.data_view() - cdef table_view c_values_data = values.data_view() + cdef table_view c_table_data = table_view_from_table(table, True) + cdef table_view c_values_data = table_view_from_table(values, True) # Note: We are ignoring index columns here c_order = (libcudf_types.order.ASCENDING diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx index a07017ef796..093163b8388 100644 --- a/python/cudf/cudf/_lib/sort.pyx +++ b/python/cudf/cudf/_lib/sort.pyx @@ -23,7 +23,7 @@ from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport null_order, null_policy, order from cudf._lib.sort cimport underlying_type_t_rank_method -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -103,7 +103,7 @@ def is_sorted( ) cdef bool c_result - cdef table_view source_table_view = source_table.data_view() + cdef table_view source_table_view = table_view_from_table(source_table) with nogil: c_result = cpp_is_sorted( source_table_view, @@ -129,7 +129,9 @@ def order_by(Table source_table, object ascending, bool na_position): """ - cdef table_view source_table_view = source_table.data_view() + cdef table_view source_table_view = table_view_from_table( + source_table, True + ) cdef vector[order] column_order column_order.reserve(len(ascending)) cdef null_order pred = ( @@ -171,8 +173,10 @@ def digitize(Table source_values_table, Table bins, bool right=False): right or the left bin edge. """ - cdef table_view bins_view = bins.view() - cdef table_view source_values_table_view = source_values_table.view() + cdef table_view bins_view = table_view_from_table(bins) + cdef table_view source_values_table_view = table_view_from_table( + source_values_table + ) cdef vector[order] column_order = ( vector[order]( bins_view.num_columns(), @@ -221,7 +225,9 @@ def rank_columns(Table source_table, object method, str na_option, """ Compute numerical data ranks (1 through n) of each column in the dataframe """ - cdef table_view source_table_view = source_table.data_view() + cdef table_view source_table_view = table_view_from_table( + source_table, True + ) cdef rank_method c_rank_method = < rank_method > ( < underlying_type_t_rank_method > method diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx index f1eca64bb87..6bb64e2fb04 100644 --- a/python/cudf/cudf/_lib/stream_compaction.pyx +++ b/python/cudf/cudf/_lib/stream_compaction.pyx @@ -24,7 +24,7 @@ from cudf._lib.cpp.types cimport ( null_policy, size_type, ) -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_unique_ptr @@ -68,7 +68,7 @@ def drop_nulls(Table source_table, how="any", keys=None, thresh=None): c_keep_threshold = 1 cdef unique_ptr[table] c_result - cdef table_view source_table_view = source_table.view() + cdef table_view source_table_view = table_view_from_table(source_table) with nogil: c_result = move( @@ -105,7 +105,7 @@ def apply_boolean_mask(Table source_table, Column boolean_mask): assert pd.api.types.is_bool_dtype(boolean_mask.dtype) cdef unique_ptr[table] c_result - cdef table_view source_table_view = source_table.view() + cdef table_view source_table_view = table_view_from_table(source_table) cdef column_view boolean_mask_view = boolean_mask.view() with nogil: @@ -177,11 +177,9 @@ def drop_duplicates(Table source_table, else null_equality.UNEQUAL ) cdef unique_ptr[table] c_result - cdef table_view source_table_view - if ignore_index: - source_table_view = source_table.data_view() - else: - source_table_view = source_table.view() + cdef table_view source_table_view = table_view_from_table( + source_table, ignore_index + ) with nogil: c_result = move( diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx index 7129b1befe1..9c503cf9ad2 100644 --- a/python/cudf/cudf/_lib/strings/combine.pyx +++ b/python/cudf/cudf/_lib/strings/combine.pyx @@ -18,7 +18,7 @@ from cudf._lib.cpp.strings.combine cimport ( from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table def concatenate(Table source_strings, @@ -33,7 +33,7 @@ def concatenate(Table source_strings, cdef DeviceScalar narep = na_rep.device_value cdef unique_ptr[column] c_result - cdef table_view source_view = source_strings.data_view() + cdef table_view source_view = table_view_from_table(source_strings, True) cdef const string_scalar* scalar_separator = \ (separator.get_raw_ptr()) diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index 09f6072953d..182e6f45b7f 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -13,3 +13,4 @@ cdef class Table: cdef table_view data_view(self) except * cdef table_view table_view_from_columns(columns) except * +cdef table_view table_view_from_table(Table tbl, ignore_index=*) except* diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 926b87615d2..9b80af74ad1 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -81,10 +81,7 @@ cdef class Table: self._data.columns ) return table_view_from_columns( - itertools.chain( - self._index._data.columns, - self._data.columns, - ) + self._index._data.columns + self._data.columns ) cdef table_view data_view(self) except *: @@ -98,10 +95,7 @@ cdef class Table: cdef table_view table_view_from_columns(columns) except*: - """ - Helper function to create a cudf::table_view from - a list of Columns - """ + """Create a cudf::table_view from an iterable of Columns.""" cdef vector[column_view] column_views cdef Column col @@ -109,3 +103,18 @@ cdef table_view table_view_from_columns(columns) except*: column_views.push_back(col.view()) return table_view(column_views) + + +cdef table_view table_view_from_table(Table tbl, ignore_index=False) except*: + """Create a cudf::table_view from a Table. + + Parameters + ---------- + ignore_index : bool, default False + If True, don't include the index in the columns. + """ + return table_view_from_columns( + tbl._index._data.columns + tbl._data.columns + if not ignore_index and tbl._index is not None + else tbl._data.columns + ) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 60e6132fe7f..7eebccd7b5b 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -14,7 +14,7 @@ from libcpp.utility cimport move from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer from cudf._lib.column cimport Column -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf.core.buffer import Buffer @@ -127,7 +127,7 @@ def transform(Column input, op): def masked_udf(Table incols, op, output_type): - cdef table_view data_view = incols.data_view() + cdef table_view data_view = table_view_from_table(incols, True) cdef string c_str = op.encode("UTF-8") cdef type_id c_tid cdef data_type c_dtype @@ -150,7 +150,7 @@ def masked_udf(Table incols, op, output_type): def table_encode(Table input): - cdef table_view c_input = input.data_view() + cdef table_view c_input = table_view_from_table(input, True) cdef pair[unique_ptr[table], unique_ptr[column]] c_result with nogil: diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx index 0f8f0b6ea14..d7acaa7270a 100644 --- a/python/cudf/cudf/_lib/transpose.pyx +++ b/python/cudf/cudf/_lib/transpose.pyx @@ -13,7 +13,7 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.transpose cimport transpose as cpp_transpose -from cudf._lib.table cimport Table +from cudf._lib.table cimport Table, table_view_from_table from cudf._lib.utils cimport data_from_table_view @@ -46,7 +46,7 @@ def transpose(Table source): raise ValueError('Columns must all have the same dtype') cdef pair[unique_ptr[column], table_view] c_result - cdef table_view c_input = source.data_view() + cdef table_view c_input = table_view_from_table(source, True) with nogil: c_result = move(cpp_transpose(c_input)) From 3e30b2905bf4bc33572b818df33faefb76aa22c1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Sep 2021 15:15:01 -0700 Subject: [PATCH 07/10] Remove now unnecessary view functions. --- python/cudf/cudf/_lib/table.pxd | 3 --- python/cudf/cudf/_lib/table.pyx | 22 ---------------------- 2 files changed, 25 deletions(-) diff --git a/python/cudf/cudf/_lib/table.pxd b/python/cudf/cudf/_lib/table.pxd index 182e6f45b7f..2299c924bbc 100644 --- a/python/cudf/cudf/_lib/table.pxd +++ b/python/cudf/cudf/_lib/table.pxd @@ -9,8 +9,5 @@ from cudf._lib.cpp.table.table_view cimport mutable_table_view, table_view cdef class Table: cdef dict __dict__ - cdef table_view view(self) except * - cdef table_view data_view(self) except * - cdef table_view table_view_from_columns(columns) except * cdef table_view table_view_from_table(Table tbl, ignore_index=*) except* diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx index 9b80af74ad1..9cbae4c7bb3 100644 --- a/python/cudf/cudf/_lib/table.pyx +++ b/python/cudf/cudf/_lib/table.pyx @@ -71,28 +71,6 @@ cdef class Table: """ return self._data.columns - cdef table_view view(self) except *: - """ - Return a cudf::table_view of all columns (including index columns) - of this Table. - """ - if self._index is None: - return table_view_from_columns( - self._data.columns - ) - return table_view_from_columns( - self._index._data.columns + self._data.columns - ) - - cdef table_view data_view(self) except *: - """ - Return a cudf::table_view of just the data columns - of this Table. - """ - return table_view_from_columns( - self._data.columns - ) - cdef table_view table_view_from_columns(columns) except*: """Create a cudf::table_view from an iterable of Columns.""" From 582ef252ccdd20bd122cc565b4f636d885365f6e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 10 Sep 2021 09:50:58 -0700 Subject: [PATCH 08/10] Fix bug in orc modification. --- python/cudf/cudf/_lib/orc.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 056f3156a29..4c3894fef74 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -249,7 +249,7 @@ cdef class ORCWriter: table._index.name is not None or isinstance(table._index, cudf.core.multiindex.MultiIndex) ) - tv = table_view_from_table(table, keep_index) + tv = table_view_from_table(table, not keep_index) with nogil: self.writer.get()[0].write(tv) From e7bf0207e46e71266c3aa4cc54901ddfd9e48d00 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Sep 2021 11:15:49 -0700 Subject: [PATCH 09/10] Update python/cudf/cudf/_lib/sort.pyx Co-authored-by: Michael Wang --- python/cudf/cudf/_lib/sort.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx index 093163b8388..0362a800ab5 100644 --- a/python/cudf/cudf/_lib/sort.pyx +++ b/python/cudf/cudf/_lib/sort.pyx @@ -226,7 +226,7 @@ def rank_columns(Table source_table, object method, str na_option, Compute numerical data ranks (1 through n) of each column in the dataframe """ cdef table_view source_table_view = table_view_from_table( - source_table, True + source_table, ignore_index=True ) cdef rank_method c_rank_method = < rank_method > ( From 5b487c6b6421c3278acb5c9a1b0d06c130d09749 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Sep 2021 14:35:33 -0700 Subject: [PATCH 10/10] Pass ignore_index by keyword everywhere. --- python/cudf/cudf/_lib/copying.pyx | 3 ++- python/cudf/cudf/_lib/hash.pyx | 3 ++- python/cudf/cudf/_lib/interop.pyx | 2 +- python/cudf/cudf/_lib/orc.pyx | 2 +- python/cudf/cudf/_lib/parquet.pyx | 6 +++--- python/cudf/cudf/_lib/quantiles.pyx | 3 ++- python/cudf/cudf/_lib/reshape.pyx | 3 ++- python/cudf/cudf/_lib/search.pyx | 6 ++++-- python/cudf/cudf/_lib/sort.pyx | 2 +- python/cudf/cudf/_lib/strings/combine.pyx | 3 ++- python/cudf/cudf/_lib/transform.pyx | 6 ++++-- python/cudf/cudf/_lib/transpose.pyx | 3 ++- 12 files changed, 26 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 386409e4262..7989e66790e 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -762,7 +762,8 @@ cdef class _CPackedColumns: input_table_view = table_view_from_table(input_table) p.index_names = input_table._index_names else: - input_table_view = table_view_from_table(input_table, True) + input_table_view = table_view_from_table( + input_table, ignore_index=True) p.column_names = input_table._column_names p.column_dtypes = {} diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index 2d6197a46a2..ffbd717e533 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -57,7 +57,8 @@ def hash_partition(Table source_table, object columns_to_hash, def hash(Table source_table, object initial_hash_values=None, int seed=0): cdef vector[uint32_t] c_initial_hash = initial_hash_values or [] - cdef table_view c_source_view = table_view_from_table(source_table, True) + cdef table_view c_source_view = table_view_from_table( + source_table, ignore_index=True) cdef unique_ptr[column] c_result with nogil: diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 11d20cf02d2..5de313854d2 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -64,7 +64,7 @@ def to_dlpack(Table source_table): cdef DLManagedTensor *dlpack_tensor cdef table_view source_table_view = table_view_from_table( - source_table, True + source_table, ignore_index=True ) with nogil: diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 4c3894fef74..bc4f4aee9cd 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -155,7 +155,7 @@ cpdef write_orc(Table table, cdef orc_writer_options c_orc_writer_options = move( orc_writer_options.builder( - sink_info_c, table_view_from_table(table, True) + sink_info_c, table_view_from_table(table, ignore_index=True) ).metadata(&metadata_) .compression(compression_) .enable_statistics( (True if enable_statistics else False)) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 1247a852dc2..248be3a2d16 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -294,7 +294,7 @@ cpdef write_parquet( ) num_index_cols_meta = len(table._index.names) else: - tv = table_view_from_table(table, True) + tv = table_view_from_table(table, ignore_index=True) tbl_meta = make_unique[table_input_metadata](tv) num_index_cols_meta = 0 @@ -382,7 +382,7 @@ cdef class ParquetWriter: isinstance(table._index, cudf.core.multiindex.MultiIndex)): tv = table_view_from_table(table) else: - tv = table_view_from_table(table, True) + tv = table_view_from_table(table, ignore_index=True) with nogil: self.writer.get()[0].write(tv) @@ -421,7 +421,7 @@ cdef class ParquetWriter: # Set the table_metadata num_index_cols_meta = 0 self.tbl_meta = make_unique[table_input_metadata]( - table_view_from_table(table, True)) + table_view_from_table(table, ignore_index=True)) if self.index is not False: if isinstance(table._index, cudf.core.multiindex.MultiIndex): tv = table_view_from_table(table) diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx index 4f78f6c4bf9..bb72a2bb046 100644 --- a/python/cudf/cudf/_lib/quantiles.pyx +++ b/python/cudf/cudf/_lib/quantiles.pyx @@ -81,7 +81,8 @@ def quantiles(Table source_table, object is_input_sorted, list column_order, list null_precedence): - cdef table_view c_input = table_view_from_table(source_table, True) + cdef table_view c_input = table_view_from_table( + source_table, ignore_index=True) cdef vector[double] c_q = q cdef interpolation c_interp = ( interp diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx index a660f2f389f..5352fefbe70 100644 --- a/python/cudf/cudf/_lib/reshape.pyx +++ b/python/cudf/cudf/_lib/reshape.pyx @@ -17,7 +17,8 @@ from cudf._lib.utils cimport data_from_unique_ptr def interleave_columns(Table source_table): - cdef table_view c_view = table_view_from_table(source_table, True) + cdef table_view c_view = table_view_from_table( + source_table, ignore_index=True) cdef unique_ptr[column] c_result with nogil: diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx index 23c33f926ed..e362062cac2 100644 --- a/python/cudf/cudf/_lib/search.pyx +++ b/python/cudf/cudf/_lib/search.pyx @@ -33,8 +33,10 @@ def search_sorted( cdef vector[libcudf_types.null_order] c_null_precedence cdef libcudf_types.order c_order cdef libcudf_types.null_order c_null_order - cdef table_view c_table_data = table_view_from_table(table, True) - cdef table_view c_values_data = table_view_from_table(values, True) + cdef table_view c_table_data = table_view_from_table( + table, ignore_index=True) + cdef table_view c_values_data = table_view_from_table( + values, ignore_index=True) # Note: We are ignoring index columns here c_order = (libcudf_types.order.ASCENDING diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx index 0362a800ab5..c10ffea5d4b 100644 --- a/python/cudf/cudf/_lib/sort.pyx +++ b/python/cudf/cudf/_lib/sort.pyx @@ -130,7 +130,7 @@ def order_by(Table source_table, object ascending, bool na_position): """ cdef table_view source_table_view = table_view_from_table( - source_table, True + source_table, ignore_index=True ) cdef vector[order] column_order column_order.reserve(len(ascending)) diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx index 9c503cf9ad2..48357403c21 100644 --- a/python/cudf/cudf/_lib/strings/combine.pyx +++ b/python/cudf/cudf/_lib/strings/combine.pyx @@ -33,7 +33,8 @@ def concatenate(Table source_strings, cdef DeviceScalar narep = na_rep.device_value cdef unique_ptr[column] c_result - cdef table_view source_view = table_view_from_table(source_strings, True) + cdef table_view source_view = table_view_from_table( + source_strings, ignore_index=True) cdef const string_scalar* scalar_separator = \ (separator.get_raw_ptr()) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 7eebccd7b5b..351d185e81a 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -127,7 +127,8 @@ def transform(Column input, op): def masked_udf(Table incols, op, output_type): - cdef table_view data_view = table_view_from_table(incols, True) + cdef table_view data_view = table_view_from_table( + incols, ignore_index=True) cdef string c_str = op.encode("UTF-8") cdef type_id c_tid cdef data_type c_dtype @@ -150,7 +151,8 @@ def masked_udf(Table incols, op, output_type): def table_encode(Table input): - cdef table_view c_input = table_view_from_table(input, True) + cdef table_view c_input = table_view_from_table( + input, ignore_index=True) cdef pair[unique_ptr[table], unique_ptr[column]] c_result with nogil: diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx index d7acaa7270a..f6c1c82cbc4 100644 --- a/python/cudf/cudf/_lib/transpose.pyx +++ b/python/cudf/cudf/_lib/transpose.pyx @@ -46,7 +46,8 @@ def transpose(Table source): raise ValueError('Columns must all have the same dtype') cdef pair[unique_ptr[column], table_view] c_result - cdef table_view c_input = table_view_from_table(source, True) + cdef table_view c_input = table_view_from_table( + source, ignore_index=True) with nogil: c_result = move(cpp_transpose(c_input))