Skip to content

Commit

Permalink
Remove unused masked keyword in column_empty (#17530)
Browse files Browse the repository at this point in the history
Follow up to #16715.

Now that the usages of the `masked` keyword in RAPIDS have been address (rapidsai/cuspatial#1496 is the only one I could find), I think we can remove this keyword all together in this method

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17530
  • Loading branch information
mroeschke authored Dec 13, 2024
1 parent 62669e0 commit 4d6925c
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 52 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,7 @@ def _concat(
f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
)
elif newsize == 0:
codes_col = column.column_empty(0, head.codes.dtype, masked=True)
codes_col = column.column_empty(0, head.codes.dtype)
else:
codes_col = column.concat_columns(codes) # type: ignore[arg-type]

Expand Down
12 changes: 4 additions & 8 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
if stop < 0 and not (stride < 0 and stop == -1):
stop = stop + len(self)
if (stride > 0 and start >= stop) or (stride < 0 and start <= stop):
return cast(Self, column_empty(0, self.dtype, masked=True))
return cast(Self, column_empty(0, self.dtype))
# compute mask slice
if stride == 1:
return libcudf.copying.column_slice(self, [start, stop])[
Expand Down Expand Up @@ -1054,7 +1054,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
if self.dtype == dtype:
result = self
else:
result = column_empty(0, dtype=dtype, masked=self.nullable)
result = column_empty(0, dtype=dtype)
elif dtype == "category":
# TODO: Figure out why `cudf.dtype("category")`
# astype's different than just the string
Expand Down Expand Up @@ -1625,7 +1625,6 @@ def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
def column_empty(
row_count: int,
dtype: Dtype = "object",
masked: bool = False,
for_numba: bool = False,
) -> ColumnBase:
"""
Expand All @@ -1642,9 +1641,6 @@ def column_empty(
dtype : Dtype
Type of the column.
masked : bool
Unused.
for_numba : bool, default False
If True, don't allocate a mask as it's not supported by numba.
"""
Expand Down Expand Up @@ -2420,7 +2416,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
"""Concatenate a sequence of columns."""
if len(objs) == 0:
dtype = cudf.dtype(None)
return column_empty(0, dtype=dtype, masked=True)
return column_empty(0, dtype=dtype)

# If all columns are `NumericalColumn` with different dtypes,
# we cast them to a common dtype.
Expand Down Expand Up @@ -2467,7 +2463,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
)
elif newsize == 0:
return column_empty(0, head.dtype, masked=True)
return column_empty(0, head.dtype)

# Filter out inputs that have 0 length, then concatenate.
objs_with_len = [o for o in objs if len(o)]
Expand Down
6 changes: 2 additions & 4 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,14 +598,12 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
if len(self) == 0:
return cast(
cudf.core.column.StringColumn,
column.column_empty(0, dtype="object", masked=False),
column.column_empty(0, dtype="object"),
)
if format in _DATETIME_SPECIAL_FORMATS:
names = as_column(_DATETIME_NAMES)
else:
names = cudf.core.column.column_empty(
0, dtype="object", masked=False
)
names = column.column_empty(0, dtype="object")
return string._datetime_to_str_typecast_functions[self.dtype](
self, format, names
)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def quantile(
result = cast(
NumericalBaseColumn,
cudf.core.column.column_empty(
row_count=len(q), dtype=self.dtype, masked=True
row_count=len(q), dtype=self.dtype
),
)
else:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5855,7 +5855,7 @@ def strptime(
f"dtype must be datetime or timedelta type, not {dtype}"
)
elif self.null_count == len(self):
return column.column_empty(len(self), dtype=dtype, masked=True) # type: ignore[return-value]
return column.column_empty(len(self), dtype=dtype) # type: ignore[return-value]
elif (self == "None").any():
raise ValueError(
"Cannot convert `None` value to datetime or timedelta."
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
if len(self) == 0:
return cast(
cudf.core.column.StringColumn,
column.column_empty(0, dtype="object", masked=False),
column.column_empty(0, dtype="object"),
)
else:
return string._timedelta_to_str_typecast_functions[self.dtype](
Expand Down
39 changes: 13 additions & 26 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,9 +774,7 @@ def __init__(
label_dtype = getattr(columns, "dtype", None)
self._data = ColumnAccessor(
{
k: column.column_empty(
len(self), dtype="object", masked=True
)
k: column_empty(len(self), dtype="object")
for k in columns
},
level_names=tuple(columns.names)
Expand Down Expand Up @@ -979,8 +977,8 @@ def _init_from_series_list(self, data, columns, index):
if columns is not None:
for col_name in columns:
if col_name not in self._data:
self._data[col_name] = column.column_empty(
row_count=len(self), dtype=None, masked=True
self._data[col_name] = column_empty(
row_count=len(self), dtype=None
)
self._data._level_names = (
tuple(columns.names)
Expand Down Expand Up @@ -1031,11 +1029,7 @@ def _init_from_list_like(self, data, index=None, columns=None):
data = list(itertools.zip_longest(*data))

if columns is not None and len(data) == 0:
data = [
cudf.core.column.column_empty(row_count=0, dtype=None)
for _ in columns
]

data = [column_empty(row_count=0, dtype=None) for _ in columns]
for col_name, col in enumerate(data):
self._data[col_name] = column.as_column(col)
self._data.rangeindex = True
Expand Down Expand Up @@ -1074,9 +1068,8 @@ def _init_from_dict_like(
# the provided index, so we need to return a masked
# array of nulls if an index is given.
empty_column = functools.partial(
cudf.core.column.column_empty,
row_count=(0 if index is None else len(index)),
masked=index is not None,
column_empty,
row_count=0 if index is None else len(index),
)

data = {
Expand Down Expand Up @@ -1421,7 +1414,7 @@ def __setitem__(self, arg, value):
new_columns = (
value
if key == arg
else column.column_empty(
else column_empty(
row_count=length, dtype=col.dtype
)
for key, col in self._column_labels_and_values
Expand Down Expand Up @@ -3373,7 +3366,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
if num_cols != 0:
ca = self._data._from_columns_like_self(
(
column.column_empty(row_count=length, dtype=dtype)
column_empty(row_count=length, dtype=dtype)
for _, dtype in self._dtypes
),
verify=False,
Expand Down Expand Up @@ -3479,7 +3472,7 @@ def diff(self, periods=1, axis=0):
if abs(periods) > len(self):
df = cudf.DataFrame._from_data(
{
name: column_empty(len(self), dtype=dtype, masked=True)
name: column_empty(len(self), dtype=dtype)
for name, dtype in zip(self._column_names, self.dtypes)
}
)
Expand Down Expand Up @@ -3859,9 +3852,7 @@ def agg(self, aggs, axis=None):
result = DataFrame(index=idxs, columns=cols)
for key in aggs.keys():
col = self[key]
col_empty = column_empty(
len(idxs), dtype=col.dtype, masked=True
)
col_empty = column_empty(len(idxs), dtype=col.dtype)
ans = cudf.Series._from_column(
col_empty, index=cudf.Index(idxs)
)
Expand Down Expand Up @@ -6177,9 +6168,7 @@ def quantile(
quant_index=False,
)._column
if len(res) == 0:
res = column.column_empty(
row_count=len(qs), dtype=ser.dtype
)
res = column_empty(row_count=len(qs), dtype=ser.dtype)
result[k] = res
result = DataFrame._from_data(result)

Expand Down Expand Up @@ -7333,9 +7322,7 @@ def unnamed_group_generator():
)

all_nulls = functools.cache(
functools.partial(
column_empty, self.shape[0], common_type, masked=True
)
functools.partial(column_empty, self.shape[0], common_type)
)

# homogenize the dtypes of the columns
Expand Down Expand Up @@ -8582,7 +8569,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
# If column not in this df, fill with an all-null column
if idx >= len(cols) or cols[idx] is None:
n = len(next(x for x in cols if x is not None))
cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True)
cols[idx] = column_empty(row_count=n, dtype=dtype)
else:
# If column is categorical, rebase the codes with the
# combined categories, and cast the new codes to the
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ def categories(self) -> cudf.Index:
Index(['b', 'a'], dtype='object')
"""
if self._categories is None:
col = cudf.core.column.column_empty(
0, dtype="object", masked=False
)
col = cudf.core.column.column_empty(0, dtype="object")
else:
col = self._categories
return cudf.Index._from_column(col)
Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,7 @@ def size(self):
"""
Return the size of each group.
"""
col = cudf.core.column.column_empty(
len(self.obj), "int8", masked=False
)
col = cudf.core.column.column_empty(len(self.obj), "int8")
result = (
cudf.Series._from_column(col, name=getattr(self.obj, "name", None))
.groupby(self.grouping, sort=self._sort, dropna=self._dropna)
Expand Down Expand Up @@ -523,7 +521,8 @@ def cumcount(self, ascending: bool = True):
return (
cudf.Series._from_column(
cudf.core.column.column_empty(
len(self.obj), "int8", masked=False
len(self.obj),
"int8",
),
index=self.obj.index,
)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def _values(self) -> ColumnBase:
if len(self) > 0:
return column.as_column(self._range, dtype=self.dtype)
else:
return column.column_empty(0, masked=False, dtype=self.dtype)
return column.column_empty(0, dtype=self.dtype)

def _clean_nulls_from_index(self) -> Self:
return self
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3851,7 +3851,6 @@ def _reindex(
if name in df._data
else cudf.core.column.column.column_empty(
dtype=dtypes.get(name, np.float64),
masked=True,
row_count=len(index),
)
)
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,6 @@ def _parquet_to_frame(
dfs[-1][name] = column_empty(
row_count=_len,
dtype=_dtype,
masked=True,
)
else:
dfs[-1][name] = as_column(
Expand Down

0 comments on commit 4d6925c

Please sign in to comment.