Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add _from_column_like_self factory #10022

Merged
18 changes: 5 additions & 13 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,8 @@ def drop_duplicates(
"""

# This utilizes the fact that all `Index` is also a `Frame`.
result = self.__class__._from_columns(
# Except RangeIndex.
vyasr marked this conversation as resolved.
Show resolved Hide resolved
return self._from_columns_like_self(
drop_duplicates(
list(self._columns),
keys=range(len(self._data)),
Expand All @@ -1453,8 +1454,6 @@ def drop_duplicates(
),
self._column_names,
)
result._copy_type_metadata(self, include_index=False)
return result

def dropna(self, how="any"):
"""
Expand All @@ -1476,12 +1475,10 @@ def dropna(self, how="any"):
for col in self._columns
]

result = self.__class__._from_columns(
return self._from_columns_like_self(
drop_nulls(data_columns, how=how, keys=range(len(data_columns)),),
self._column_names,
)
result._copy_type_metadata(self, include_index=False)
return result

def _gather(self, gather_map, nullify=False, check_bounds=True):
"""Gather rows of index specified by indices in `gather_map`.
Expand All @@ -1501,14 +1498,11 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
):
raise IndexError("Gather map index is out of bounds.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
gather(list(self._columns), gather_map, nullify=nullify),
self._column_names,
)

result._copy_type_metadata(self, include_index=False)
return result

def take(self, indices, axis=0, allow_fill=True, fill_value=None):
"""Return a new index containing the rows specified by *indices*

Expand Down Expand Up @@ -1561,12 +1555,10 @@ def _apply_boolean_mask(self, boolean_mask):
if not is_bool_dtype(boolean_mask.dtype):
raise ValueError("boolean_mask is not boolean type.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
apply_boolean_mask(list(self._columns), boolean_mask),
column_names=self._column_names,
)
result._copy_type_metadata(self)
return result

def _split_columns_by_levels(self, levels):
if isinstance(levels, int) and levels > 0:
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ def _from_columns(

return cls._from_data(data, index)

def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: List[str],
index_names: Optional[List[str]] = None,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
):
isVoid marked this conversation as resolved.
Show resolved Hide resolved
col = self.__class__._from_columns(columns, column_names, index_names)
col._copy_type_metadata(self, include_index=bool(index_names))
return col
isVoid marked this conversation as resolved.
Show resolved Hide resolved

def _mimic_inplace(
self: T, result: Frame, inplace: bool = False
) -> Optional[Frame]:
Expand Down
17 changes: 4 additions & 13 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def _gather(
):
raise IndexError("Gather map index is out of bounds.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.copying.gather(
list(self._index._columns + self._columns)
if keep_index
Expand All @@ -568,9 +568,6 @@ def _gather(
self._index.names if keep_index else None,
)

result._copy_type_metadata(self, include_index=keep_index)
return result

def _positions_from_column_names(
self, column_names, offset_by_index_columns=False
):
Expand Down Expand Up @@ -628,7 +625,7 @@ def drop_duplicates(
keys = self._positions_from_column_names(
subset, offset_by_index_columns=not ignore_index
)
result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.drop_duplicates(
list(self._columns)
if ignore_index
Expand All @@ -640,8 +637,6 @@ def drop_duplicates(
self._column_names,
self._index.names if not ignore_index else None,
)
result._copy_type_metadata(self)
return result

def add_prefix(self, prefix):
"""
Expand Down Expand Up @@ -1354,7 +1349,7 @@ def _drop_na_rows(
for col in self._columns
]

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.drop_nulls(
list(self._index._data.columns) + data_columns,
how=how,
Expand All @@ -1366,8 +1361,6 @@ def _drop_na_rows(
self._column_names,
self._index.names,
)
result._copy_type_metadata(self)
return result

def _apply_boolean_mask(self, boolean_mask):
"""Apply boolean mask to each row of `self`.
Expand All @@ -1378,15 +1371,13 @@ def _apply_boolean_mask(self, boolean_mask):
if not is_bool_dtype(boolean_mask.dtype):
raise ValueError("boolean_mask is not boolean type.")

result = self.__class__._from_columns(
return self._from_columns_like_self(
libcudf.stream_compaction.apply_boolean_mask(
list(self._index._columns + self._columns), boolean_mask
),
column_names=self._column_names,
index_names=self._index.names,
)
result._copy_type_metadata(self)
return result

def take(self, indices, axis=0):
"""Return a new frame containing the rows specified by *indices*.
Expand Down