forked from NVIDIA/spark-rapids
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This PR contributes to NVIDIA#10153, refactors all cython APIs in `transpose.pyx`, `sort.pyx` to accept a list of columns as input. This PR also includes several minor improvements in the code base, see comments below for detail. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Ashwin Srinath (https://github.com/shwina) URL: rapidsai/cudf#10675
- Loading branch information
Showing
9 changed files
with
222 additions
and
246 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,27 @@ | ||
# Copyright (c) 2020, NVIDIA CORPORATION. | ||
|
||
import cudf | ||
from cudf.api.types import is_categorical_dtype | ||
# Copyright (c) 2020-2022, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.pair cimport pair | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.column cimport Column | ||
from cudf._lib.cpp.column.column cimport column | ||
from cudf._lib.cpp.column.column_view cimport column_view | ||
from cudf._lib.cpp.table.table cimport table | ||
from cudf._lib.cpp.table.table_view cimport table_view | ||
from cudf._lib.cpp.transpose cimport transpose as cpp_transpose | ||
from cudf._lib.utils cimport data_from_table_view, table_view_from_table | ||
|
||
from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns | ||
|
||
def transpose(source): | ||
"""Transpose index and columns. | ||
|
||
See Also | ||
-------- | ||
cudf.core.DataFrame.transpose | ||
def transpose(list source_columns): | ||
"""Transpose m n-row columns into n m-row columns | ||
""" | ||
|
||
if source._num_columns == 0: | ||
return source | ||
|
||
cats = None | ||
columns = source._columns | ||
dtype = columns[0].dtype | ||
|
||
if is_categorical_dtype(dtype): | ||
if any(not is_categorical_dtype(c.dtype) for c in columns): | ||
raise ValueError('Columns must all have the same dtype') | ||
cats = list(c.categories for c in columns) | ||
cats = cudf.core.column.concat_columns(cats).unique() | ||
source = cudf.core.frame.Frame(index=source._index, data=[ | ||
(name, col._set_categories(cats, is_unique=True).codes) | ||
for name, col in source._data.items() | ||
]) | ||
elif any(c.dtype != dtype for c in columns): | ||
raise ValueError('Columns must all have the same dtype') | ||
|
||
cdef pair[unique_ptr[column], table_view] c_result | ||
cdef table_view c_input = table_view_from_table( | ||
source, ignore_index=True) | ||
cdef table_view c_input = table_view_from_columns(source_columns) | ||
|
||
with nogil: | ||
c_result = move(cpp_transpose(c_input)) | ||
|
||
result_owner = Column.from_unique_ptr(move(c_result.first)) | ||
data, _ = data_from_table_view( | ||
return columns_from_table_view( | ||
c_result.second, | ||
owner=result_owner, | ||
column_names=range(c_input.num_rows()) | ||
owners=[result_owner] * c_result.second.num_columns() | ||
) | ||
|
||
if cats is not None: | ||
data= [ | ||
(name, cudf.core.column.column.build_categorical_column( | ||
codes=cudf.core.column.column.build_column( | ||
col.base_data, dtype=col.dtype), | ||
mask=col.base_mask, | ||
size=col.size, | ||
categories=cats, | ||
offset=col.offset, | ||
)) | ||
for name, col in data.items() | ||
] | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.