Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Cython APIs for table view generation #9199

Merged
16 changes: 5 additions & 11 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@ from cudf._lib.cpp.concatenate cimport (
concatenate_tables as libcudf_concatenate_tables,
)
from cudf._lib.cpp.table.table cimport table, table_view
from cudf._lib.table cimport Table
from cudf._lib.utils cimport (
data_from_unique_ptr,
make_column_views,
make_table_data_views,
make_table_views,
)
from cudf._lib.table cimport Table, table_view_from_table
from cudf._lib.utils cimport data_from_unique_ptr, make_column_views

from cudf.core.buffer import Buffer

Expand Down Expand Up @@ -47,10 +42,9 @@ cpdef concat_columns(object columns):
cpdef concat_tables(object tables, bool ignore_index=False):
cdef unique_ptr[table] c_result
cdef vector[table_view] c_views
if ignore_index is False:
c_views = make_table_views(tables)
else:
c_views = make_table_data_views(tables)
c_views.reserve(len(tables))
for tbl in tables:
c_views.push_back(table_view_from_table(tbl, ignore_index))
with nogil:
c_result = move(libcudf_concatenate_tables(c_views))

Expand Down
60 changes: 28 additions & 32 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ from cudf._lib.column cimport Column
from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table, make_table_view
from cudf._lib.table cimport (
Table,
table_view_from_columns,
table_view_from_table,
)

from cudf._lib.reduce import minmax
from cudf.core.abc import Serializable
Expand Down Expand Up @@ -160,11 +164,9 @@ def gather(
f" rows.")

cdef unique_ptr[table] c_result
cdef table_view source_table_view
if keep_index is True:
source_table_view = source_table.view()
else:
source_table_view = source_table.data_view()
cdef table_view source_table_view = table_view_from_table(
source_table, not keep_index
)
cdef column_view gather_map_view = gather_map.view()
cdef cpp_copying.out_of_bounds_policy policy = (
cpp_copying.out_of_bounds_policy.NULLIFY if nullify
Expand Down Expand Up @@ -200,7 +202,8 @@ def scatter(object source, Column scatter_map, Column target_column,
"""

cdef column_view scatter_map_view = scatter_map.view()
cdef table_view target_table_view = make_table_view((target_column,))
cdef table_view target_table_view = table_view_from_columns(
(target_column,))
cdef bool c_bounds_check = bounds_check
cdef unique_ptr[table] c_result

Expand All @@ -212,7 +215,7 @@ def scatter(object source, Column scatter_map, Column target_column,
cdef DeviceScalar slr

if isinstance(source, Column):
source_table_view = make_table_view((<Column> source,))
source_table_view = table_view_from_columns((<Column> source,))

with nogil:
c_result = move(
Expand Down Expand Up @@ -263,7 +266,7 @@ def _reverse_column(Column source_column):


def _reverse_table(Table source_table):
cdef table_view reverse_table_view = source_table.view()
cdef table_view reverse_table_view = table_view_from_columns(source_table)

cdef unique_ptr[table] c_result
with nogil:
Expand Down Expand Up @@ -325,11 +328,9 @@ def column_allocate_like(Column input_column, size=None):

def table_empty_like(Table input_table, bool keep_index=True):

cdef table_view input_table_view
if keep_index is True:
input_table_view = input_table.view()
else:
input_table_view = input_table.data_view()
cdef table_view input_table_view = table_view_from_table(
input_table, not keep_index
)

cdef unique_ptr[table] c_result

Expand Down Expand Up @@ -376,11 +377,9 @@ def column_slice(Column input_column, object indices):

def table_slice(Table input_table, object indices, bool keep_index=True):

cdef table_view input_table_view
if keep_index is True:
input_table_view = input_table.view()
else:
input_table_view = input_table.data_view()
cdef table_view input_table_view = table_view_from_table(
input_table, not keep_index
)

cdef vector[size_type] c_indices
c_indices.reserve(len(indices))
Expand Down Expand Up @@ -445,11 +444,9 @@ def column_split(Column input_column, object splits):

def table_split(Table input_table, object splits, bool keep_index=True):

cdef table_view input_table_view
if keep_index is True:
input_table_view = input_table.view()
else:
input_table_view = input_table.data_view()
cdef table_view input_table_view = table_view_from_table(
input_table, not keep_index
)

cdef vector[size_type] c_splits
c_splits.reserve(len(splits))
Expand Down Expand Up @@ -588,8 +585,8 @@ def copy_if_else(object lhs, object rhs, Column boolean_mask):
def _boolean_mask_scatter_table(Table input_table, Table target_table,
Column boolean_mask):

cdef table_view input_table_view = input_table.view()
cdef table_view target_table_view = target_table.view()
cdef table_view input_table_view = table_view_from_columns(input_table)
cdef table_view target_table_view = table_view_from_columns(target_table)
cdef column_view boolean_mask_view = boolean_mask.view()

cdef unique_ptr[table] c_result
Expand Down Expand Up @@ -619,7 +616,7 @@ def _boolean_mask_scatter_scalar(list input_scalars, Table target_table,
for scl in input_scalars:
input_scalar_vector.push_back(reference_wrapper[constscalar](
scl.get_raw_ptr()[0]))
cdef table_view target_table_view = target_table.view()
cdef table_view target_table_view = table_view_from_columns(target_table)
cdef column_view boolean_mask_view = boolean_mask.view()

cdef unique_ptr[table] c_result
Expand Down Expand Up @@ -702,9 +699,7 @@ def get_element(Column input_column, size_type index):

def sample(Table input, size_type n,
bool replace, int64_t seed, bool keep_index=True):
cdef table_view tbl_view = (
input.view() if keep_index else input.data_view()
)
cdef table_view tbl_view = table_view_from_table(input, not keep_index)
cdef cpp_copying.sample_with_replacement replacement

if replace:
Expand Down Expand Up @@ -764,10 +759,11 @@ cdef class _CPackedColumns:
or input_table.index.stop != len(input_table)
or input_table.index.step != 1
):
input_table_view = input_table.view()
input_table_view = table_view_from_table(input_table)
p.index_names = input_table._index_names
else:
input_table_view = input_table.data_view()
input_table_view = table_view_from_table(
input_table, ignore_index=True)

p.column_names = input_table._column_names
p.column_dtypes = {}
Expand Down
11 changes: 8 additions & 3 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ from cudf._lib.cpp.io.types cimport (
)
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.io.utils cimport make_sink_info, make_source_info
from cudf._lib.table cimport Table, make_table_view
from cudf._lib.table cimport (
Table,
table_view_from_columns,
table_view_from_table,
)
from cudf._lib.utils cimport data_from_unique_ptr

ctypedef int32_t underlying_type_t_compression
Expand Down Expand Up @@ -458,8 +462,9 @@ cpdef write_csv(
--------
cudf.to_csv
"""
cdef table_view input_table_view = \
table.view() if index is True else table.data_view()
cdef table_view input_table_view = table_view_from_table(
table, not index
)
cdef bool include_header_c = header
cdef char delim_c = ord(sep)
cdef string line_term_c = line_terminator.encode()
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/filling.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, table_view_from_table
from cudf._lib.utils cimport data_from_unique_ptr


Expand Down Expand Up @@ -59,7 +59,7 @@ def repeat(Table inp, object count, bool check_count=False):


def _repeat_via_column(Table inp, Column count, bool check_count):
cdef table_view c_inp = inp.view()
cdef table_view c_inp = table_view_from_table(inp)
cdef column_view c_count = count.view()
cdef bool c_check_count = check_count
cdef unique_ptr[table] c_result
Expand All @@ -79,7 +79,7 @@ def _repeat_via_column(Table inp, Column count, bool check_count):


def _repeat_via_size_type(Table inp, size_type count):
cdef table_view c_inp = inp.view()
cdef table_view c_inp = table_view_from_table(inp)
cdef unique_ptr[table] c_result

with nogil:
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import cudf

from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, table_view_from_table

from cudf._lib.scalar import as_device_scalar

Expand Down Expand Up @@ -74,7 +74,7 @@ cdef class GroupBy:
else:
c_null_handling = libcudf_types.null_policy.INCLUDE

cdef table_view keys_view = keys.view()
cdef table_view keys_view = table_view_from_table(keys)

with nogil:
self.c_obj.reset(
Expand All @@ -90,7 +90,7 @@ cdef class GroupBy:

def groups(self, Table values):

cdef table_view values_view = values.view()
cdef table_view values_view = table_view_from_table(values)

with nogil:
c_groups = move(self.c_obj.get()[0].get_groups(values_view))
Expand Down Expand Up @@ -293,7 +293,7 @@ cdef class GroupBy:
return self.aggregate_internal(values, aggregations)

def shift(self, Table values, int periods, list fill_values):
cdef table_view view = values.view()
cdef table_view view = table_view_from_table(values)
cdef size_type num_col = view.num_columns()
cdef vector[size_type] offsets = vector[size_type](num_col, periods)

Expand Down Expand Up @@ -329,7 +329,7 @@ cdef class GroupBy:
return shifted, grouped_keys

def replace_nulls(self, Table values, object method):
cdef table_view val_view = values.view()
cdef table_view val_view = table_view_from_table(values)
cdef pair[unique_ptr[table], unique_ptr[table]] c_result
cdef replace_policy policy = (
replace_policy.PRECEDING
Expand Down
13 changes: 6 additions & 7 deletions python/cudf/cudf/_lib/hash.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,17 @@ from cudf._lib.cpp.hash cimport hash as cpp_hash
from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, table_view_from_table
from cudf._lib.utils cimport data_from_unique_ptr


def hash_partition(Table source_table, object columns_to_hash,
int num_partitions, bool keep_index=True):
cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
cdef int c_num_partitions = num_partitions
cdef table_view c_source_view
if keep_index is True:
c_source_view = source_table.view()
else:
c_source_view = source_table.data_view()
cdef table_view c_source_view = table_view_from_table(
source_table, not keep_index
)

cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
with nogil:
Expand Down Expand Up @@ -59,7 +57,8 @@ def hash_partition(Table source_table, object columns_to_hash,

def hash(Table source_table, object initial_hash_values=None, int seed=0):
cdef vector[uint32_t] c_initial_hash = initial_hash_values or []
cdef table_view c_source_view = source_table.data_view()
cdef table_view c_source_view = table_view_from_table(
source_table, ignore_index=True)

cdef unique_ptr[column] c_result
with nogil:
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/_lib/interop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ from cudf._lib.cpp.interop cimport (
)
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, table_view_from_table
from cudf._lib.utils cimport data_from_unique_ptr


Expand Down Expand Up @@ -63,7 +63,9 @@ def to_dlpack(Table source_table):
)

cdef DLManagedTensor *dlpack_tensor
cdef table_view source_table_view = source_table.data_view()
cdef table_view source_table_view = table_view_from_table(
source_table, ignore_index=True
)

with nogil:
dlpack_tensor = cpp_to_dlpack(
Expand Down Expand Up @@ -128,7 +130,7 @@ def to_arrow(Table input_table,

cdef vector[column_metadata] cpp_metadata = gather_metadata(metadata)
cdef table_view input_table_view = (
input_table.view() if keep_index else input_table.data_view()
table_view_from_table(input_table, not keep_index)
)

cdef shared_ptr[CTable] cpp_arrow_table
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport data_type, size_type, type_id
from cudf._lib.table cimport Table, columns_from_ptr
from cudf._lib.table cimport Table, table_view_from_table

# The functions below return the *gathermaps* that represent
# the join result when joining on the keys `lhs` and `rhs`.

cpdef join(Table lhs, Table rhs, how=None):
cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
cdef table_view c_lhs = lhs.view()
cdef table_view c_rhs = rhs.view()
cdef table_view c_lhs = table_view_from_table(lhs)
cdef table_view c_rhs = table_view_from_table(rhs)

if how == "inner":
c_result = move(cpp_join.inner_join(
Expand Down Expand Up @@ -52,8 +52,8 @@ cpdef join(Table lhs, Table rhs, how=None):
cpdef semi_join(Table lhs, Table rhs, how=None):
# left-semi and left-anti joins
cdef cpp_join.gather_map_type c_result
cdef table_view c_lhs = lhs.view()
cdef table_view c_rhs = rhs.view()
cdef table_view c_lhs = table_view_from_table(lhs)
cdef table_view c_rhs = table_view_from_table(rhs)

if how == "leftsemi":
c_result = move(cpp_join.left_semi_join(
Expand Down
8 changes: 3 additions & 5 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ from cudf._lib.cpp.types cimport (
size_type,
)
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, table_view_from_table
from cudf._lib.types cimport (
underlying_type_t_null_order,
underlying_type_t_order,
Expand Down Expand Up @@ -63,9 +63,7 @@ def count_elements(Column col):


def explode_outer(Table tbl, int explode_column_idx, bool ignore_index=False):
cdef table_view c_table_view = (
tbl.data_view() if ignore_index else tbl.view()
)
cdef table_view c_table_view = table_view_from_table(tbl, ignore_index)
cdef size_type c_explode_column_idx = explode_column_idx

cdef unique_ptr[table] c_result
Expand Down Expand Up @@ -168,7 +166,7 @@ def contains_scalar(Column col, object py_search_key):
def concatenate_rows(Table tbl):
cdef unique_ptr[column] c_result

cdef table_view c_table_view = tbl.view()
cdef table_view c_table_view = table_view_from_table(tbl)

with nogil:
c_result = move(cpp_concatenate_rows(
Expand Down
Loading