Skip to content

Commit

Permalink
Add _from_column_like_self factory (#10022)
Browse files Browse the repository at this point in the history
Follow up to #9558

On a return trip from libcudf, it is a common pattern for cudf frame to apply its own metadata to the columns. This PR generalizes this procedure as a new factory function `_from_colums_like_self`

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Paul Taylor (https://github.com/trxcllnt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #10022
  • Loading branch information
isVoid authored Jan 20, 2022
1 parent 690993c commit 2bd7320
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 26 deletions.
18 changes: 5 additions & 13 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,8 @@ def drop_duplicates(
"""

# This utilizes the fact that all `Index` is also a `Frame`.
result = self.__class__._from_columns(
# Except RangeIndex.
return self._from_columns_like_self(
drop_duplicates(
list(self._columns),
keys=range(len(self._data)),
Expand All @@ -1453,8 +1454,6 @@ def drop_duplicates(
),
self._column_names,
)
result._copy_type_metadata(self, include_index=False)
return result

def dropna(self, how="any"):
"""
Expand All @@ -1476,12 +1475,10 @@ def dropna(self, how="any"):
for col in self._columns
]

result = self.__class__._from_columns(
return self._from_columns_like_self(
drop_nulls(data_columns, how=how, keys=range(len(data_columns)),),
self._column_names,
)
result._copy_type_metadata(self, include_index=False)
return result

def _gather(self, gather_map, nullify=False, check_bounds=True):
"""Gather rows of index specified by indices in `gather_map`.
Expand All @@ -1501,14 +1498,11 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
):
raise IndexError("Gather map index is out of bounds.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
gather(list(self._columns), gather_map, nullify=nullify),
self._column_names,
)

result._copy_type_metadata(self, include_index=False)
return result

def take(self, indices, axis=0, allow_fill=True, fill_value=None):
"""Return a new index containing the rows specified by *indices*
Expand Down Expand Up @@ -1561,12 +1555,10 @@ def _apply_boolean_mask(self, boolean_mask):
if not is_bool_dtype(boolean_mask.dtype):
raise ValueError("boolean_mask is not boolean type.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
apply_boolean_mask(list(self._columns), boolean_mask),
column_names=self._column_names,
)
result._copy_type_metadata(self)
return result

def _split_columns_by_levels(self, levels):
if isinstance(levels, int) and levels > 0:
Expand Down
16 changes: 16 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,22 @@ def _from_columns(

return cls._from_data(data, index)

def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: List[str],
index_names: Optional[List[str]] = None,
):
"""Construct a `Frame` from a list of columns with metadata from self.
If `index_names` is set, the first `len(index_names)` columns are
used to construct the index of the frame.
"""
frame = self.__class__._from_columns(
columns, column_names, index_names
)
return frame._copy_type_metadata(self, include_index=bool(index_names))

def _mimic_inplace(
self: T, result: Frame, inplace: bool = False
) -> Optional[Frame]:
Expand Down
17 changes: 4 additions & 13 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def _gather(
):
raise IndexError("Gather map index is out of bounds.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.copying.gather(
list(self._index._columns + self._columns)
if keep_index
Expand All @@ -568,9 +568,6 @@ def _gather(
self._index.names if keep_index else None,
)

result._copy_type_metadata(self, include_index=keep_index)
return result

def _positions_from_column_names(
self, column_names, offset_by_index_columns=False
):
Expand Down Expand Up @@ -628,7 +625,7 @@ def drop_duplicates(
keys = self._positions_from_column_names(
subset, offset_by_index_columns=not ignore_index
)
result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.drop_duplicates(
list(self._columns)
if ignore_index
Expand All @@ -640,8 +637,6 @@ def drop_duplicates(
self._column_names,
self._index.names if not ignore_index else None,
)
result._copy_type_metadata(self)
return result

def add_prefix(self, prefix):
"""
Expand Down Expand Up @@ -1354,7 +1349,7 @@ def _drop_na_rows(
for col in self._columns
]

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.drop_nulls(
list(self._index._data.columns) + data_columns,
how=how,
Expand All @@ -1366,8 +1361,6 @@ def _drop_na_rows(
self._column_names,
self._index.names,
)
result._copy_type_metadata(self)
return result

def _apply_boolean_mask(self, boolean_mask):
"""Apply boolean mask to each row of `self`.
Expand All @@ -1378,15 +1371,13 @@ def _apply_boolean_mask(self, boolean_mask):
if not is_bool_dtype(boolean_mask.dtype):
raise ValueError("boolean_mask is not boolean type.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.apply_boolean_mask(
list(self._index._columns + self._columns), boolean_mask
),
column_names=self._column_names,
index_names=self._index.names,
)
result._copy_type_metadata(self)
return result

def take(self, indices, axis=0):
"""Return a new frame containing the rows specified by *indices*.
Expand Down

0 comments on commit 2bd7320

Please sign in to comment.