Skip to content

Commit

Permalink
Import NA from missing rather than using cudf.NA everywhere (#1…
Browse files Browse the repository at this point in the history
…0821)

This PR changes cuDF so `NA` isn't used around the codebase from the top level `cudf` namespace and rather is imported directly from `missing`. This is part of #10820 and comes as a follow up to #10791 (comment)

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #10821
  • Loading branch information
brandon-b-miller authored May 10, 2022
1 parent dc0c3cd commit 366206d
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 27 deletions.
23 changes: 12 additions & 11 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ from cudf._lib.types import (
duration_unit_map,
)
from cudf.core.dtypes import ListDtype, StructDtype
from cudf.core.missing import NA

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column_view cimport column_view
Expand Down Expand Up @@ -170,7 +171,7 @@ cdef class DeviceScalar:
return self.get_raw_ptr()[0].is_valid()

def __repr__(self):
if self.value is cudf.NA:
if self.value is NA:
return (
f"{self.__class__.__name__}"
f"({self.value}, {repr(self.dtype)})"
Expand Down Expand Up @@ -356,7 +357,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
else:
pyarrow_table = pa.Table.from_arrays(
[
pa.array([cudf.NA], from_pandas=True, type=f.type)
pa.array([NA], from_pandas=True, type=f.type)
for f in arrow_schema
],
names=columns
Expand All @@ -371,7 +372,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,

cdef _get_py_dict_from_struct(unique_ptr[scalar]& s):
if not s.get()[0].is_valid():
return cudf.NA
return NA

cdef table_view struct_table_view = (<struct_scalar*>s.get()).view()
column_names = [str(i) for i in range(struct_table_view.num_columns())]
Expand All @@ -386,7 +387,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
object dtype,
bool valid=True):

value = value if valid else [cudf.NA]
value = value if valid else [NA]
cdef Column col
if isinstance(dtype.element_type, ListDtype):
pa_type = dtype.element_type.to_arrow()
Expand All @@ -404,7 +405,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
cdef _get_py_list_from_list(unique_ptr[scalar]& s):

if not s.get()[0].is_valid():
return cudf.NA
return NA

cdef column_view list_col_view = (<list_scalar*>s.get()).view()
cdef Column list_col = Column.from_column_view(list_col_view, None)
Expand All @@ -416,14 +417,14 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s):

cdef _get_py_string_from_string(unique_ptr[scalar]& s):
if not s.get()[0].is_valid():
return cudf.NA
return NA
return (<string_scalar*>s.get())[0].to_string().decode()


cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()
if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand Down Expand Up @@ -456,7 +457,7 @@ cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()
if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand All @@ -480,7 +481,7 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()

if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand Down Expand Up @@ -571,7 +572,7 @@ def as_device_scalar(val, dtype=None):


def _is_null_host_scalar(slr):
if slr is None or slr is cudf.NA:
if slr is None or slr is NA:
return True
elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
return True
Expand Down Expand Up @@ -603,5 +604,5 @@ def _nested_na_replace(input_list):
if isinstance(value, list):
_nested_na_replace(value)
elif value is None:
input_list[idx] = cudf.NA
input_list[idx] = NA
return input_list
5 changes: 2 additions & 3 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from cudf.core.dataframe import DataFrame
from cudf.core.frame import Frame
from cudf.core.index import Index
from cudf.core.missing import NA
from cudf.core.series import Series
from cudf.core.single_column_frame import SingleColumnFrame

Expand All @@ -28,9 +29,7 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
f"{type(other).__name__} to {col.dtype.name}"
)

return cudf.Scalar(
other, dtype=col.dtype if other in {None, cudf.NA} else None
)
return cudf.Scalar(other, dtype=col.dtype if other in {None, NA} else None)


def _check_and_cast_columns_with_other(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
ListDtype,
StructDtype,
)
from cudf.core.missing import NA
from cudf.core.mixins import BinaryOperand, Reducible
from cudf.utils.dtypes import (
cudf_dtype_from_pa_type,
Expand Down Expand Up @@ -499,7 +500,7 @@ def __setitem__(self, key: Any, value: Any):
self._mimic_inplace(out, inplace=True)

def _wrap_binop_normalization(self, other):
if other is cudf.NA or other is None:
if other is NA or other is None:
return cudf.Scalar(other, dtype=self.dtype)
if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from cudf.core.column import ColumnBase, as_column, column
from cudf.core.column.methods import ColumnMethods, ParentType
from cudf.core.dtypes import ListDtype
from cudf.core.missing import NA


class ListColumn(ColumnBase):
Expand Down Expand Up @@ -91,7 +92,7 @@ def __setitem__(self, key, value):
if isinstance(value, cudf.Scalar):
if value.dtype != self.dtype:
raise TypeError("list nesting level mismatch")
elif value is cudf.NA:
elif value is NA:
value = cudf.Scalar(value, dtype=self.dtype)
else:
raise ValueError(f"Can not set {value} into ListColumn")
Expand Down Expand Up @@ -354,7 +355,7 @@ def get(
index = as_column(index)
out = extract_element_column(self._column, as_column(index))

if not (default is None or default is cudf.NA):
if not (default is None or default is NA):
# determine rows for which `index` is out-of-bounds
lengths = count_elements(self._column)
out_of_bounds_mask = (np.negative(index) > lengths) | (
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cudf import _lib as libcudf
from cudf._typing import ScalarLike
from cudf.core.column import ColumnBase
from cudf.core.missing import NA
from cudf.core.mixins import Scannable


Expand Down Expand Up @@ -116,7 +117,7 @@ def quantile(
scalar_result = result.element_indexing(0)
return (
cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
if scalar_result is cudf.NA
if scalar_result is NA
else scalar_result
)
return result
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cudf.core.column import ColumnBase, build_struct_column
from cudf.core.column.methods import ColumnMethods
from cudf.core.dtypes import StructDtype
from cudf.core.missing import NA


class StructColumn(ColumnBase):
Expand Down Expand Up @@ -102,7 +103,7 @@ def __setitem__(self, key, value):
if isinstance(value, dict):
# filling in fields not in dict
for field in self.dtype.fields:
value[field] = value.get(field, cudf.NA)
value[field] = value.get(field, NA)

value = cudf.Scalar(value, self.dtype)
super().__setitem__(key, value)
Expand Down
11 changes: 5 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
_indices_from_labels,
doc_reset_index_template,
)
from cudf.core.missing import NA
from cudf.core.multiindex import MultiIndex
from cudf.core.resample import DataFrameResampler
from cudf.core.series import Series
Expand Down Expand Up @@ -364,9 +365,7 @@ def _setitem_tuple_arg(self, key, value):
scatter_map = _indices_from_labels(self._frame, key[0])
for col in columns_df._column_names:
columns_df[col][scatter_map] = (
value._data[col]
if col in value_column_names
else cudf.NA
value._data[col] if col in value_column_names else NA
)

else:
Expand Down Expand Up @@ -479,7 +478,7 @@ def _setitem_tuple_arg(self, key, value):
value_column_names = set(value._column_names)
for col in columns_df._column_names:
columns_df[col][key[0]] = (
value._data[col] if col in value_column_names else cudf.NA
value._data[col] if col in value_column_names else NA
)

else:
Expand Down Expand Up @@ -3867,8 +3866,8 @@ def applymap(
# bytecode to generate the equivalent PTX
# as a null-ignoring version of the function
def _func(x): # pragma: no cover
if x is cudf.NA:
return cudf.NA
if x is NA:
return NA
else:
return devfunc(x)

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
is_struct_dtype,
)
from cudf.core._compat import PANDAS_GE_110
from cudf.core.missing import NA


def dtype_can_compare_equal_to_other(dtype):
Expand Down Expand Up @@ -290,7 +291,7 @@ def assert_column_equal(


def null_safe_scalar_equals(left, right):
if left in {cudf.NA, np.nan} or right in {cudf.NA, np.nan}:
if left in {NA, np.nan} or right in {NA, np.nan}:
return left is right
return left == right

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import cudf
from cudf.core._compat import PANDAS_GE_120
from cudf.core.missing import NA

_NA_REP = "<NA>"

Expand Down Expand Up @@ -591,7 +592,7 @@ def _can_cast(from_dtype, to_dtype):
`np.can_cast` but with some special handling around
cudf specific dtypes.
"""
if from_dtype in {None, cudf.NA}:
if from_dtype in {None, NA}:
return True
if isinstance(from_dtype, type):
from_dtype = cudf.dtype(from_dtype)
Expand Down

0 comments on commit 366206d

Please sign in to comment.