From b5337d7dc695275b3a72e19c7bc99a69c54d2d2c Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 3 Mar 2022 09:42:37 -0600 Subject: [PATCH] Add `nvtx` annotations for `Series` and `Index` (#10374) This PR adds some missing `nvtx` annotations for `Series` and `Index`. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/10374 --- python/cudf/cudf/core/frame.py | 6 +- python/cudf/cudf/core/index.py | 190 ++++++++++++++++--- python/cudf/cudf/core/multiindex.py | 109 +++++++++-- python/cudf/cudf/core/series.py | 186 ++++++++++++++---- python/cudf/cudf/core/single_column_frame.py | 91 +++++++-- 5 files changed, 485 insertions(+), 97 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 3d36d3bd893..87f4ed0bbc4 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -6921,12 +6921,12 @@ def _drop_rows_by_labels( raise KeyError("One or more values not found in axis") key_df = cudf.DataFrame(index=labels) - if isinstance(obj, cudf.Series): + if isinstance(obj, cudf.DataFrame): + return obj.join(key_df, how="leftanti") + else: res = obj.to_frame(name="tmp").join(key_df, how="leftanti")["tmp"] res.name = obj.name return res - else: - return obj.join(key_df, how="leftanti") def _apply_inverse_column(col: ColumnBase) -> ColumnBase: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 343ba33ece1..cf58a453bb6 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -160,6 +160,7 @@ class RangeIndex(BaseIndex, BinaryOperand): _range: range + @annotate("RANGEINDEX_INIT", color="green", domain="cudf_python") def __init__( self, start, stop=None, step=1, dtype=None, copy=False, name=None ): @@ -190,43 +191,50 @@ def _copy_type_metadata( # have an underlying column. return self - @property + @property # type: ignore + @annotate("RANGEINDEX_NAME", color="green", domain="cudf_python") def name(self): """ Returns the name of the Index. """ return self._name - @name.setter + @name.setter # type: ignore + @annotate("RANGEINDEX_INIT_SETTER", color="green", domain="cudf_python") def name(self, value): self._name = value - @property + @property # type: ignore + @annotate("RANGEINDEX_START", color="green", domain="cudf_python") def start(self): """ The value of the `start` parameter (0 if this was not supplied). """ return self._start - @property + @property # type: ignore + @annotate("RANGEINDEX_STOP", color="green", domain="cudf_python") def stop(self): """ The value of the stop parameter. """ return self._stop - @property + @property # type: ignore + @annotate("RANGEINDEX_STEP", color="green", domain="cudf_python") def step(self): """ The value of the step parameter. """ return self._step - @property + @property # type: ignore + @annotate("RANGEINDEX_NUM_ROWS", color="green", domain="cudf_python") def _num_rows(self): return len(self) @cached_property + @annotate("RANGEINDEX_VALUES", color="green", domain="cudf_python") def _values(self): if len(self) > 0: return column.arange( @@ -256,12 +264,14 @@ def is_categorical(self): def is_interval(self): return False - @property + @property # type: ignore + @annotate("RANGEINDEX_DATA", color="green", domain="cudf_python") def _data(self): return cudf.core.column_accessor.ColumnAccessor( {self.name: self._values} ) + @annotate("RANGEINDEX_CONTAINS", color="green", domain="cudf_python") def __contains__(self, item): if not isinstance( item, tuple(np.sctypes["int"] + np.sctypes["float"] + [int, float]) @@ -271,6 +281,7 @@ def __contains__(self, item): return False return item in range(self._start, self._stop, self._step) + @annotate("RANGEINDEX_COPY", color="green", domain="cudf_python") def copy(self, name=None, deep=False, dtype=None, names=None): """ Make a copy of this object. @@ -301,9 +312,13 @@ def copy(self, name=None, deep=False, dtype=None, names=None): start=self._start, stop=self._stop, step=self._step, name=name ) + @annotate( + "RANGEINDEX_DROP_DUPLICATES", color="green", domain="cudf_python" + ) def drop_duplicates(self, keep="first"): return self + @annotate("RANGEINDEX_REPR", color="green", domain="cudf_python") def __repr__(self): return ( f"{self.__class__.__name__}(start={self._start}, stop={self._stop}" @@ -316,9 +331,11 @@ def __repr__(self): + ")" ) + @annotate("RANGEINDEX_LEN", color="green", domain="cudf_python") def __len__(self): return len(range(self._start, self._stop, self._step)) + @annotate("RANGEINDEX_GETITEM", color="green", domain="cudf_python") def __getitem__(self, index): len_self = len(self) if isinstance(index, slice): @@ -344,6 +361,7 @@ def __getitem__(self, index): return as_index(self._values[index], name=self.name) + @annotate("RangeIndex_EQUALS", color="green", domain="cudf_python") def equals(self, other): if isinstance(other, RangeIndex): if (self._start, self._stop, self._step) == ( @@ -354,6 +372,7 @@ def equals(self, other): return True return Int64Index._from_data(self._data).equals(other) + @annotate("RANGEINDEX_SERIALIZE", color="green", domain="cudf_python") def serialize(self): header = {} header["index_column"] = {} @@ -374,6 +393,7 @@ def serialize(self): return header, frames @classmethod + @annotate("RANGEINDEX_DESERIALIZE", color="green", domain="cudf_python") def deserialize(cls, header, frames): h = header["index_column"] name = pickle.loads(header["name"]) @@ -382,13 +402,17 @@ def deserialize(cls, header, frames): step = h.get("step", 1) return RangeIndex(start=start, stop=stop, step=step, name=name) - @property + @property # type: ignore + @annotate("RANGEINDEX_DTYPE", color="green", domain="cudf_python") def dtype(self): """ `dtype` of the range of values in RangeIndex. """ return cudf.dtype(np.int64) + @annotate( + "RANGEINDEX_FIND_LABEL_RANGE", color="green", domain="cudf_python" + ) def find_label_range(self, first=None, last=None): """Find subrange in the ``RangeIndex``, marked by their positions, that starts greater or equal to ``first`` and ends less or equal to ``last`` @@ -428,6 +452,7 @@ def find_label_range(self, first=None, last=None): return begin, end + @annotate("RANGEINDEX_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self): return pd.RangeIndex( start=self._start, @@ -444,14 +469,27 @@ def is_unique(self): """ return True - @property + @property # type: ignore + @annotate( + "RANGEINDEX_IS_MONOTONIC_INCREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_increasing(self): return self._step > 0 or len(self) <= 1 - @property + @property # type: ignore + @annotate( + "RANGEINDEX_IS_MONOTONIC_DECREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_decreasing(self): return self._step < 0 or len(self) <= 1 + @annotate( + "RANGEINDEX_GET_SLICE_BOUND", color="green", domain="cudf_python" + ) def get_slice_bound(self, label, side, kind=None): """ Calculate slice bound that corresponds to given label. @@ -486,6 +524,7 @@ def get_slice_bound(self, label, side, kind=None): pos = search_range(start, stop, label, step, side=side) return pos + @annotate("RANGEINDEX_MEMORY_USAGE", color="green", domain="cudf_python") def memory_usage(self, deep=False): if deep: warnings.warn( @@ -498,6 +537,7 @@ def unique(self): # RangeIndex always has unique values return self + @annotate("RANGEINDEX_MUL", color="green", domain="cudf_python") def __mul__(self, other): # Multiplication by raw ints must return a RangeIndex to match pandas. if isinstance(other, cudf.Scalar) and other.dtype.kind in "iu": @@ -514,20 +554,24 @@ def __mul__(self, other): ) return self._as_int64().__mul__(other) + @annotate("RANGEINDEX_RMUL", color="green", domain="cudf_python") def __rmul__(self, other): # Multiplication is commutative. return self.__mul__(other) + @annotate("RANGEINDEX_AS_INT64", color="green", domain="cudf_python") def _as_int64(self): # Convert self to an Int64Index. This method is used to perform ops # that are not defined directly on RangeIndex. return Int64Index._from_data(self._data) + @annotate("RANGEINDEX_ARRAY_UFUNC", color="green", domain="cudf_python") def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return self._as_int64().__array_ufunc__( ufunc, method, *inputs, **kwargs ) + @annotate("RANGEINDEX_GETATTR", color="green", domain="cudf_python") def __getattr__(self, key): # For methods that are not defined for RangeIndex we attempt to operate # on the corresponding integer index if possible. @@ -538,6 +582,7 @@ def __getattr__(self, key): f"'{type(self)}' object has no attribute {key}" ) + @annotate("RANGEINDEX_GET_LOC", color="green", domain="cudf_python") def get_loc(self, key, method=None, tolerance=None): # Given an actual integer, idx = (key - self._start) / self._step @@ -571,6 +616,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return np.clip(round_method(idx), 0, idx_int_upper_bound, dtype=int) + @annotate("RANGEINDEX_UNION_INTERNAL", color="green", domain="cudf_python") def _union(self, other, sort=None): if isinstance(other, RangeIndex): # Variable suffixes are of the @@ -645,6 +691,9 @@ def _union(self, other, sort=None): # then perform `union`. return Int64Index(self._values)._union(other, sort=sort) + @annotate( + "RANGEINDEX_INTERSECTION_INTERNAL", color="green", domain="cudf_python" + ) def _intersection(self, other, sort=False): if not isinstance(other, RangeIndex): return super()._intersection(other, sort=sort) @@ -690,12 +739,18 @@ def _intersection(self, other, sort=False): return new_index + @annotate( + "RANGEINDEX_GATHER_INTERNAL", color="green", domain="cudf_python" + ) def _gather(self, gather_map, nullify=False, check_bounds=True): gather_map = cudf.core.column.as_column(gather_map) return Int64Index._from_columns( [self._values.take(gather_map, nullify, check_bounds)], [self.name] ) + @annotate( + "RANGEINDEX_APPLY_BOOLEAN_MASK", color="green", domain="cudf_python" + ) def _apply_boolean_mask(self, boolean_mask): return Int64Index._from_columns( [self._values.apply_boolean_mask(boolean_mask)], [self.name] @@ -734,6 +789,7 @@ class GenericIndex(SingleColumnFrame, BaseIndex): Column's, the data Column will be cloned to adopt this name. """ + @annotate("GENERICINDEX_INIT", color="green", domain="cudf_python") def __init__(self, data, **kwargs): kwargs = _setdefault_name(data, **kwargs) @@ -754,6 +810,7 @@ def __init__(self, data, **kwargs): name = kwargs.get("name") super().__init__({name: data}) + @annotate("GENERICINDEX_ARRAY_UFUNC", color="green", domain="cudf_python") def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) @@ -789,6 +846,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented + @annotate("GENERICINDEX_BINARYOP", color="green", domain="cudf_python") def _binaryop( self, other: T, op: str, fill_value: Any = None, *args, **kwargs, ) -> SingleColumnFrame: @@ -807,6 +865,9 @@ def _binaryop( return ret.values return ret + @annotate( + "GENERICINDEX_COPY_TYPE_METADATA", color="green", domain="cudf_python" + ) def _copy_type_metadata( self, other: Frame, include_index: bool = True ) -> GenericIndex: @@ -823,11 +884,13 @@ def _copy_type_metadata( ) return self - @property + @property # type: ignore + @annotate("GENERICINDEX_VALUES", color="green", domain="cudf_python") def _values(self): return self._column @classmethod + @annotate("GENERICINDEX_CONCAT", color="green", domain="cudf_python") def _concat(cls, objs): if all(isinstance(obj, RangeIndex) for obj in objs): result = _concat_range_index(objs) @@ -844,6 +907,7 @@ def _concat(cls, objs): result.name = name return result + @annotate("GENERICINDEX_MEMORY_USAGE", color="green", domain="cudf_python") def memory_usage(self, deep=False): return sum(super().memory_usage(deep=deep).values()) @@ -877,6 +941,7 @@ def equals(self, other, **kwargs): except TypeError: return False + @annotate("GENERICINDEX_COPY", color="green", domain="cudf_python") def copy(self, name=None, deep=False, dtype=None, names=None): """ Make a copy of this object. @@ -904,6 +969,7 @@ def copy(self, name=None, deep=False, dtype=None, names=None): col = self._values.astype(dtype) return _index_from_data({name: col.copy(True) if deep else col}) + @annotate("GENERICINDEX_GET_LOC", color="green", domain="cudf_python") def get_loc(self, key, method=None, tolerance=None): """Get integer location, slice or boolean mask for requested label. @@ -1022,6 +1088,7 @@ def get_loc(self, key, method=None, tolerance=None): mask[true_inds] = True return mask + @annotate("GENERICINDEX_REPR", color="green", domain="cudf_python") def __repr__(self): max_seq_items = get_option("max_seq_items") or len(self) mr = 0 @@ -1098,6 +1165,7 @@ def __repr__(self): return "\n".join(lines) + @annotate("GENERICINDEX_GETITEM", color="green", domain="cudf_python") def __getitem__(self, index): if type(self) == IntervalIndex: raise NotImplementedError( @@ -1109,13 +1177,17 @@ def __getitem__(self, index): res.name = self.name return res - @property + @property # type: ignore + @annotate("GENERICINDEX_DTYPE", color="green", domain="cudf_python") def dtype(self): """ `dtype` of the underlying values in GenericIndex. """ return self._values.dtype + @annotate( + "GENERICINDEX_FIND_LABEL_RANGE", color="green", domain="cudf_python" + ) def find_label_range(self, first, last): """Find range that starts with *first* and ends with *last*, inclusively. @@ -1135,6 +1207,9 @@ def find_label_range(self, first, last): end += 1 return begin, end + @annotate( + "GENERICINDEX_GET_SLICE_BOUND", color="green", domain="cudf_python" + ) def get_slice_bound(self, label, side, kind=None): return self._values.get_slice_bound(label, side, kind) @@ -1159,6 +1234,7 @@ def is_categorical(self): def is_interval(self): return False + @annotate("GENERICINDEX_ARGSORT", color="green", domain="cudf_python") def argsort( self, axis=0, @@ -1220,6 +1296,7 @@ class NumericIndex(GenericIndex): # Subclasses must define the dtype they are associated with. _dtype: Union[None, Type[np.number]] = None + @annotate("NUMERICINDEX_INIT", color="green", domain="cudf_python") def __init__(self, data=None, dtype=None, copy=False, name=None): dtype = type(self)._dtype @@ -1557,6 +1634,7 @@ class DatetimeIndex(GenericIndex): dtype='datetime64[ns]', name='a') """ + @annotate("DATETIMEINDEX_INIT", color="green", domain="cudf_python") def __init__( self, data=None, @@ -1611,7 +1689,8 @@ def __init__( data = column.as_column(np.array(data, dtype=dtype)) super().__init__(data, **kwargs) - @property + @property # type: ignore + @annotate("DATETIMEINDEX_YEAR", color="green", domain="cudf_python") def year(self): """ The year of the datetime. @@ -1629,7 +1708,8 @@ def year(self): """ # noqa: E501 return self._get_dt_field("year") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_MONTH", color="green", domain="cudf_python") def month(self): """ The month as January=1, December=12. @@ -1647,7 +1727,8 @@ def month(self): """ # noqa: E501 return self._get_dt_field("month") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_DAY", color="green", domain="cudf_python") def day(self): """ The day of the datetime. @@ -1665,7 +1746,8 @@ def day(self): """ # noqa: E501 return self._get_dt_field("day") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_HOUR", color="green", domain="cudf_python") def hour(self): """ The hours of the datetime. @@ -1685,7 +1767,8 @@ def hour(self): """ return self._get_dt_field("hour") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_MINUTE", color="green", domain="cudf_python") def minute(self): """ The minutes of the datetime. @@ -1705,7 +1788,8 @@ def minute(self): """ return self._get_dt_field("minute") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_SECOND", color="green", domain="cudf_python") def second(self): """ The seconds of the datetime. @@ -1725,7 +1809,8 @@ def second(self): """ return self._get_dt_field("second") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_WEEKDAY", color="green", domain="cudf_python") def weekday(self): """ The day of the week with Monday=0, Sunday=6. @@ -1746,7 +1831,8 @@ def weekday(self): """ return self._get_dt_field("weekday") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_DAYOFWEEK", color="green", domain="cudf_python") def dayofweek(self): """ The day of the week with Monday=0, Sunday=6. @@ -1767,7 +1853,8 @@ def dayofweek(self): """ return self._get_dt_field("weekday") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_DAYOFYEAR", color="green", domain="cudf_python") def dayofyear(self): """ The day of the year, from 1-365 in non-leap years and @@ -1789,7 +1876,8 @@ def dayofyear(self): """ return self._get_dt_field("day_of_year") - @property + @property # type: ignore + @annotate("DATETIMEINDEX_DAY_OF_YEAR", color="green", domain="cudf_python") def day_of_year(self): """ The day of the year, from 1-365 in non-leap years and @@ -1811,7 +1899,10 @@ def day_of_year(self): """ return self._get_dt_field("day_of_year") - @property + @property # type: ignore + @annotate( + "DATETIMEINDEX_IS_LEAP_YEAR", color="green", domain="cudf_python" + ) def is_leap_year(self): """ Boolean indicator if the date belongs to a leap year. @@ -1829,7 +1920,8 @@ def is_leap_year(self): res = is_leap_year(self._values).fillna(False) return cupy.asarray(res) - @property + @property # type: ignore + @annotate("DATETIMEINDEX_QUARTER", color="green", domain="cudf_python") def quarter(self): """ Integer indicator for which quarter of the year the date belongs in. @@ -1854,6 +1946,7 @@ def quarter(self): res = extract_quarter(self._values) return Int8Index(res, dtype="int8") + @annotate("DATETIMEINDEX_ISOCALENDAR", color="green", domain="cudf_python") def isocalendar(self): """ Returns a DataFrame with the year, week, and day @@ -1875,10 +1968,14 @@ def isocalendar(self): """ return cudf.core.tools.datetimes._to_iso_calendar(self) + @annotate("DATETIMEINDEX_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) + @annotate( + "DATETIMEINDEX_GET_DT_FIELD", color="green", domain="cudf_python" + ) def _get_dt_field(self, field): out_column = self._values.get_dt_field(field) # column.column_empty_like always returns a Column object @@ -1895,6 +1992,7 @@ def _get_dt_field(self, field): def is_boolean(self): return False + @annotate("DATETIMEINDEX_CEIL", color="green", domain="cudf_python") def ceil(self, freq): """ Perform ceil operation on the data to the specified freq. @@ -1927,6 +2025,7 @@ def ceil(self, freq): return self.__class__._from_data({self.name: out_column}) + @annotate("DATETIMEINDEX_FLOOR", color="green", domain="cudf_python") def floor(self, freq): """ Perform floor operation on the data to the specified freq. @@ -1959,6 +2058,7 @@ def floor(self, freq): return self.__class__._from_data({self.name: out_column}) + @annotate("DATETIMEINDEX_ROUND", color="green", domain="cudf_python") def round(self, freq): """ Perform round operation on the data to the specified freq. @@ -2041,6 +2141,7 @@ class TimedeltaIndex(GenericIndex): dtype='timedelta64[s]', name='delta-index') """ + @annotate("TIMEDELTAINDEX_INIT", color="green", domain="cudf_python") def __init__( self, data=None, @@ -2072,6 +2173,7 @@ def __init__( data = column.as_column(np.array(data, dtype=dtype)) super().__init__(data, **kwargs) + @annotate("TIMEDELTAINDEX_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self): return pd.TimedeltaIndex( self._values.to_pandas(), @@ -2079,28 +2181,36 @@ def to_pandas(self): unit=self._values.time_unit, ) - @property + @property # type: ignore + @annotate("TIMEDELTAINDEX_INIT", color="green", domain="cudf_python") def days(self): """ Number of days for each element. """ return as_index(arbitrary=self._values.days, name=self.name) - @property + @property # type: ignore + @annotate("TIMEDELTAINDEX_SECONDS", color="green", domain="cudf_python") def seconds(self): """ Number of seconds (>= 0 and less than 1 day) for each element. """ return as_index(arbitrary=self._values.seconds, name=self.name) - @property + @property # type: ignore + @annotate( + "TIMEDELTAINDEX_MICROSECONDS", color="green", domain="cudf_python" + ) def microseconds(self): """ Number of microseconds (>= 0 and less than 1 second) for each element. """ return as_index(arbitrary=self._values.microseconds, name=self.name) - @property + @property # type: ignore + @annotate( + "TIMEDELTAINDEX_NANOSECONDS", color="green", domain="cudf_python" + ) def nanoseconds(self): """ Number of nanoseconds (>= 0 and less than 1 microsecond) for each @@ -2108,7 +2218,8 @@ def nanoseconds(self): """ return as_index(arbitrary=self._values.nanoseconds, name=self.name) - @property + @property # type: ignore + @annotate("TIMEDELTAINDEX_COMPONENTS", color="green", domain="cudf_python") def components(self): """ Return a dataframe of the components (days, hours, minutes, @@ -2174,6 +2285,7 @@ class CategoricalIndex(GenericIndex): CategoricalIndex([1, 2, 3, ], categories=[1, 2, 3], ordered=False, dtype='category', name='a') """ # noqa: E501 + @annotate("CATEGORICALINDEX_INIT", color="green", domain="cudf_python") def __init__( self, data=None, @@ -2228,14 +2340,18 @@ def __init__( super().__init__(data, **kwargs) - @property + @property # type: ignore + @annotate("CATEGORICALINDEX_CODES", color="green", domain="cudf_python") def codes(self): """ The category codes of this categorical. """ return as_index(self._values.codes) - @property + @property # type: ignore + @annotate( + "CATEGORICALINDEX_CATEGORIES", color="green", domain="cudf_python" + ) def categories(self): """ The categories of this categorical. @@ -2249,6 +2365,7 @@ def is_categorical(self): return True +@annotate("INDEX_INTERVAL_RANGE", color="green", domain="cudf_python") def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right", ) -> "IntervalIndex": @@ -2411,6 +2528,7 @@ class IntervalIndex(GenericIndex): IntervalIndex """ + @annotate("INTERVALINDEX_INIT", color="green", domain="cudf_python") def __init__( self, data, closed=None, dtype=None, copy=False, name=None, ): @@ -2435,6 +2553,7 @@ def __init__( self.closed = closed super().__init__(data, **kwargs) + @annotate("INTERVALINDEX_FROM_BREAKS", color="green", domain="cudf_python") def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None): """ Construct an IntervalIndex from an array of splits. @@ -2491,6 +2610,7 @@ class StringIndex(GenericIndex): name: A string """ + @annotate("STRINGINDEX_INIT", color="green", domain="cudf_python") def __init__(self, values, copy=False, **kwargs): kwargs = _setdefault_name(values, **kwargs) if isinstance(values, StringColumn): @@ -2506,11 +2626,13 @@ def __init__(self, values, copy=False, **kwargs): super().__init__(values, **kwargs) + @annotate("STRINGINDEX_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self): return pd.Index( self.to_numpy(na_value=None), name=self.name, dtype="object" ) + @annotate("STRINGINDEX_REPR", color="green", domain="cudf_python") def __repr__(self): return ( f"{self.__class__.__name__}({self._values.values_host}," @@ -2525,6 +2647,7 @@ def __repr__(self): @copy_docstring(StringMethods) # type: ignore @property + @annotate("STRINGINDEX_STR", color="green", domain="cudf_python") def str(self): return StringMethods(parent=self) @@ -2545,6 +2668,7 @@ def is_object(self): return True +@annotate("INDEX_AS_INDEX", color="green", domain="cudf_python") def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex: """Create an Index from an arbitrary object @@ -2673,6 +2797,7 @@ class Index(BaseIndex, metaclass=IndexMeta): names=['a', 'b']) """ + @annotate("INDEX_INIT", color="green", domain="cudf_python") def __new__( cls, data=None, @@ -2701,6 +2826,7 @@ def __new__( ) @classmethod + @annotate("INDEX_FROM_ARROW", color="green", domain="cudf_python") def from_arrow(cls, obj): try: return cls(ColumnBase.from_arrow(obj)) @@ -2709,6 +2835,7 @@ def from_arrow(cls, obj): return cudf.MultiIndex.from_arrow(obj) +@annotate("INDEX_CONCAT_RANGE_INDEX", color="green", domain="cudf_python") def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. @@ -2749,6 +2876,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) +@annotate("INDEX_EXTENDEX_GCD", color="green", domain="cudf_python") def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index b09a2d39c14..4864ca5bae1 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -13,6 +13,7 @@ import cupy import numpy as np import pandas as pd +from nvtx import annotate from pandas._config import get_option import cudf @@ -63,6 +64,7 @@ class MultiIndex(Frame, BaseIndex, NotIterable): ) """ + @annotate("MULTIINDEX_INIT", color="green", domain="cudf_python") def __init__( self, levels=None, @@ -147,11 +149,13 @@ def __init__( self._name = None self.names = names - @property + @property # type: ignore + @annotate("MULTIINDEX_NAMES_GETTER", color="green", domain="cudf_python") def names(self): return self._names - @names.setter + @names.setter # type: ignore + @annotate("MULTIINDEX_NAMES_SETTER", color="green", domain="cudf_python") def names(self, value): value = [None] * self.nlevels if value is None else value @@ -169,6 +173,7 @@ def names(self, value): ) self._names = pd.core.indexes.frozen.FrozenList(value) + @annotate("MULTIINDEX_RENAME", color="green", domain="cudf_python") def rename(self, names, inplace=False): """ Alter MultiIndex level names @@ -215,6 +220,7 @@ def rename(self, names, inplace=False): """ return self.set_names(names, level=None, inplace=inplace) + @annotate("MULTIINDEX_SET_NAMES", color="green", domain="cudf_python") def set_names(self, names, level=None, inplace=False): names_is_list_like = is_list_like(names) level_is_list_like = is_list_like(level) @@ -252,6 +258,7 @@ def set_names(self, names, level=None, inplace=False): return self._set_names(names=names, inplace=inplace) @classmethod + @annotate("MULTIINDEX_FROM_DATA", color="green", domain="cudf_python") def _from_data( cls, data: MutableMapping, @@ -264,14 +271,17 @@ def _from_data( obj.name = name return obj - @property + @property # type: ignore + @annotate("MULTIINDEX_NAME_GETTER", color="green", domain="cudf_python") def name(self): return self._name - @name.setter + @name.setter # type: ignore + @annotate("MULTIINDEX_NAME_GETTER", color="green", domain="cudf_python") def name(self, value): self._name = value + @annotate("MULTIINDEX_COPY", color="green", domain="cudf_python") def copy( self, names=None, @@ -367,6 +377,7 @@ def copy( return mi + @annotate("MULTIINDEX_REPR", color="green", domain="cudf_python") def __repr__(self): max_seq_items = get_option("display.max_seq_items") or len(self) @@ -443,7 +454,8 @@ def __repr__(self): data_output = "\n".join(lines) return output_prefix + data_output - @property + @property # type: ignore + @annotate("MULTIINDEX_CODES", color="green", domain="cudf_python") def codes(self): """ Returns the codes of the underlying MultiIndex. @@ -473,12 +485,14 @@ def codes(self): self._compute_levels_and_codes() return self._codes - @property + @property # type: ignore + @annotate("MULTIINDEX_NLEVELS", color="green", domain="cudf_python") def nlevels(self): """Integer number of levels in this MultiIndex.""" return len(self._data) - @property + @property # type: ignore + @annotate("MULTIINDEX_LEVELS", color="green", domain="cudf_python") def levels(self): """ Returns list of levels in the MultiIndex @@ -515,11 +529,15 @@ def levels(self): self._compute_levels_and_codes() return self._levels - @property + @property # type: ignore + @annotate("MULTIINDEX_NDIM", color="green", domain="cudf_python") def ndim(self): """Dimension of the data. For MultiIndex ndim is always 2.""" return 2 + @annotate( + "MULTIINDEX_GET_LEVEL_LABEL", color="green", domain="cudf_python" + ) def _get_level_label(self, level): """Get name of the level. @@ -536,6 +554,7 @@ def _get_level_label(self, level): else: return self._data.names[level] + @annotate("MULTIINDEX_ISIN", color="green", domain="cudf_python") def isin(self, values, level=None): """Return a boolean array where the index values are in values. @@ -640,6 +659,11 @@ def where(self, cond, other=None, inplace=False): ".where is not supported for MultiIndex operations" ) + @annotate( + "MULTIINDEX_COMPUTE_LEVELS_AND_CODES", + color="green", + domain="cudf_python", + ) def _compute_levels_and_codes(self): levels = [] @@ -652,6 +676,9 @@ def _compute_levels_and_codes(self): self._levels = levels self._codes = cudf.DataFrame._from_data(codes) + @annotate( + "MULTIINDEX_COMPUTE_VALIDITY_MASK", color="green", domain="cudf_python" + ) def _compute_validity_mask(self, index, row_tuple, max_length): """Computes the valid set of indices of values in the lookup""" lookup = cudf.DataFrame() @@ -680,6 +707,11 @@ def _compute_validity_mask(self, index, row_tuple, max_length): raise KeyError(row) return result + @annotate( + "MULTIINDEX_GET_VALID_INDICES_BY_TUPLE", + color="green", + domain="cudf_python", + ) def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): # Instructions for Slicing # if tuple, get first and last elements of tuple @@ -707,6 +739,9 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): return row_tuple return self._compute_validity_mask(index, row_tuple, max_length) + @annotate( + "MULTIINDEX_INDEX_AND_DOWNCAST", color="green", domain="cudf_python" + ) def _index_and_downcast(self, result, index, index_key): if isinstance(index_key, (numbers.Number, slice)): @@ -775,6 +810,7 @@ def _index_and_downcast(self, result, index, index_key): result.index = index return result + @annotate("MULTIINDEX_GET_ROW_MAJOR", color="green", domain="cudf_python") def _get_row_major( self, df: DataFrameOrSeries, @@ -800,6 +836,9 @@ def _get_row_major( final = self._index_and_downcast(result, result.index, row_tuple) return final + @annotate( + "MULTIINDEX_VALIDATE_INDEXER", color="green", domain="cudf_python" + ) def _validate_indexer( self, indexer: Union[ @@ -826,6 +865,7 @@ def _validate_indexer( for i in indexer: self._validate_indexer(i) + @annotate("MULTIINDEX_EQ", color="green", domain="cudf_python") def __eq__(self, other): if isinstance(other, MultiIndex): for self_col, other_col in zip( @@ -836,11 +876,13 @@ def __eq__(self, other): return self.names == other.names return NotImplemented - @property + @property # type: ignore + @annotate("MULTIINDEX_SIZE", color="green", domain="cudf_python") def size(self): # The size of a MultiIndex is only dependent on the number of rows. return self._num_rows + @annotate("MULTIINDEX_TAKE", color="green", domain="cudf_python") def take(self, indices): if isinstance(indices, cudf.Series) and indices.has_nulls: raise ValueError("Column must have no nulls.") @@ -848,6 +890,7 @@ def take(self, indices): obj.names = self.names return obj + @annotate("MULTIINDEX_SERIALIZE", color="green", domain="cudf_python") def serialize(self): header, frames = super().serialize() # Overwrite the names in _data with the true names. @@ -855,6 +898,7 @@ def serialize(self): return header, frames @classmethod + @annotate("MULTIINDEX_DESERIALIZE", color="green", domain="cudf_python") def deserialize(cls, header, frames): # Spoof the column names to construct the frame, then set manually. column_names = pickle.loads(header["column_names"]) @@ -862,6 +906,7 @@ def deserialize(cls, header, frames): obj = super().deserialize(header, frames) return obj._set_names(column_names) + @annotate("MULTIINDEX_GETITEM", color="green", domain="cudf_python") def __getitem__(self, index): flatten = isinstance(index, int) @@ -884,6 +929,7 @@ def __getitem__(self, index): result.names = self.names return result + @annotate("MULTIINDEX_TO_FRAME", color="green", domain="cudf_python") def to_frame(self, index=True, name=None): # TODO: Currently this function makes a shallow copy, which is # incorrect. We want to make a deep copy, otherwise further @@ -900,6 +946,9 @@ def to_frame(self, index=True, name=None): df.columns = name return df + @annotate( + "MULTIINDEX_GET_LEVEL_VALUES", color="green", domain="cudf_python" + ) def get_level_values(self, level): """ Return the values at the requested level @@ -953,6 +1002,7 @@ def is_interval(self): return False @classmethod + @annotate("MULTIINDEX_CONCAT", color="green", domain="cudf_python") def _concat(cls, objs): source_data = [o.to_frame(index=False) for o in objs] @@ -973,6 +1023,7 @@ def _concat(cls, objs): return cudf.MultiIndex.from_frame(source_data, names=names) @classmethod + @annotate("MULTIINDEX_FROM_TUPLES", color="green", domain="cudf_python") def from_tuples(cls, tuples, names=None): """ Convert list of tuples to MultiIndex. @@ -1009,7 +1060,8 @@ def from_tuples(cls, tuples, names=None): pdi = pd.MultiIndex.from_tuples(tuples, names=names) return cls.from_pandas(pdi) - @property + @property # type: ignore + @annotate("MULTIINDEX_VALUES_HOST", color="green", domain="cudf_python") def values_host(self): """ Return a numpy representation of the MultiIndex. @@ -1036,7 +1088,8 @@ def values_host(self): """ return self.to_pandas().values - @property + @property # type: ignore + @annotate("MULTIINDEX_VALUES", color="green", domain="cudf_python") def values(self): """ Return a CuPy representation of the MultiIndex. @@ -1068,6 +1121,7 @@ def values(self): return self.to_frame(index=False).values @classmethod + @annotate("MULTIINDEX_FROM_FRAME", color="green", domain="cudf_python") def from_frame(cls, df, names=None): """ Make a MultiIndex from a DataFrame. @@ -1141,6 +1195,7 @@ def from_frame(cls, df, names=None): return obj @classmethod + @annotate("MULTIINDEX_FROM_PRODUCT", color="green", domain="cudf_python") def from_product(cls, arrays, names=None): """ Make a MultiIndex from the cartesian product of multiple iterables. @@ -1181,6 +1236,7 @@ def from_product(cls, arrays, names=None): pdi = pd.MultiIndex.from_product(arrays, names=names) return cls.from_pandas(pdi) + @annotate("MULTIINDEX_POP_LEVELS", color="green", domain="cudf_python") def _poplevels(self, level): """ Remove and return the specified levels from self. @@ -1231,6 +1287,7 @@ def _poplevels(self, level): return popped + @annotate("MULTIINDEX_DROP_LEVEL", color="green", domain="cudf_python") def droplevel(self, level=-1): """ Removes the specified levels from the MultiIndex. @@ -1293,11 +1350,13 @@ def droplevel(self, level=-1): else: return mi + @annotate("MULTIINDEX_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self, nullable=False, **kwargs): result = self.to_frame(index=False).to_pandas(nullable=nullable) return pd.MultiIndex.from_frame(result, names=self.names) @classmethod + @annotate("MULTIINDEX_FROM_PANDAS", color="green", domain="cudf_python") def from_pandas(cls, multiindex, nan_as_null=None): """ Convert from a Pandas MultiIndex @@ -1334,10 +1393,16 @@ def from_pandas(cls, multiindex, nan_as_null=None): return cls.from_frame(df, names=multiindex.names) @cached_property + @annotate("MULTIINDEX_IS_UNIQUE", color="green", domain="cudf_python") def is_unique(self): return len(self) == len(self.unique()) - @property + @property # type: ignore + @annotate( + "MULTIINDEX_IS_MONOTONIC_INCREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_increasing(self): """ Return if the index is monotonic increasing @@ -1345,7 +1410,12 @@ def is_monotonic_increasing(self): """ return self._is_sorted(ascending=None, null_position=None) - @property + @property # type: ignore + @annotate( + "MULTIINDEX_IS_MONOTONIC_DECREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_decreasing(self): """ Return if the index is monotonic decreasing @@ -1355,6 +1425,7 @@ def is_monotonic_decreasing(self): ascending=[False] * len(self.levels), null_position=None ) + @annotate("MULTIINDEX_FILLNA", color="green", domain="cudf_python") def fillna(self, value): """ Fill null values with the specified value. @@ -1395,6 +1466,7 @@ def fillna(self, value): return super().fillna(value=value) + @annotate("MULTIINDEX_UNIQUE", color="green", domain="cudf_python") def unique(self): return self.drop_duplicates(keep="first") @@ -1408,6 +1480,7 @@ def _clean_nulls_from_index(self): index_df._clean_nulls_from_dataframe(index_df), names=self.names ) + @annotate("MULTIINDEX_MEMORY_USAGE", color="green", domain="cudf_python") def memory_usage(self, deep=False): usage = sum(super().memory_usage(deep=deep).values()) if self.levels: @@ -1418,11 +1491,13 @@ def memory_usage(self, deep=False): usage += col.memory_usage return usage + @annotate("MULTIINDEX_DIFFERENCE", color="green", domain="cudf_python") def difference(self, other, sort=None): if hasattr(other, "to_pandas"): other = other.to_pandas() return self.to_pandas().difference(other, sort) + @annotate("MULTIINDEX_APPEND", color="green", domain="cudf_python") def append(self, other): """ Append a collection of MultiIndex objects together @@ -1485,6 +1560,7 @@ def append(self, other): return MultiIndex._concat(to_concat) + @annotate("MULTIINDEX_ARRAY_FUNCTION", color="green", domain="cudf_python") def __array_function__(self, func, types, args, kwargs): cudf_df_module = MultiIndex @@ -1531,6 +1607,7 @@ def _level_index_from_level(self, level): ) from None return level + @annotate("MULTIINDEX_GET_LOC", color="green", domain="cudf_python") def get_loc(self, key, method=None, tolerance=None): """ Get location for a label or a tuple of labels. @@ -1667,6 +1744,7 @@ def _maybe_match_names(self, other): for self_name, other_name in zip(self.names, other.names) ] + @annotate("MULTIINDEX_UNION", color="green", domain="cudf_python") def _union(self, other, sort=None): # TODO: When to_frame is refactored to return a # deep copy in future, we should push most of the common @@ -1692,6 +1770,7 @@ def _union(self, other, sort=None): return midx.sort_values() return midx + @annotate("MULTIINDEX_INTERSECTION", color="green", domain="cudf_python") def _intersection(self, other, sort=None): if self.names != other.names: deep = True @@ -1714,6 +1793,9 @@ def _intersection(self, other, sort=None): return midx.sort_values() return midx + @annotate( + "MULTIINDEX_COPY_TYPE_METADATA", color="green", domain="cudf_python" + ) def _copy_type_metadata( self, other: Frame, include_index: bool = True ) -> Frame: @@ -1721,6 +1803,7 @@ def _copy_type_metadata( res._names = other._names return res + @annotate("MULTIINDEX_SPLIT_LEVELS", color="green", domain="cudf_python") def _split_columns_by_levels(self, levels): # This function assumes that for levels with duplicate names, they are # specified by indices, not name by ``levels``. E.g. [None, None] can diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 740be91eb9d..e315b24851a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -14,6 +14,7 @@ import cupy import numpy as np import pandas as pd +from nvtx import annotate from pandas._config import get_option import cudf @@ -95,6 +96,7 @@ class _SeriesIlocIndexer(_FrameIndexer): For integer-location based selection. """ + @annotate("SERIESILOC_GETITEM", color="green", domain="cudf_python") def __getitem__(self, arg): if isinstance(arg, tuple): arg = list(arg) @@ -110,6 +112,7 @@ def __getitem__(self, arg): {self._frame.name: data}, index=cudf.Index(self._frame.index[arg]), ) + @annotate("SERIESILOC_SETITEM", color="green", domain="cudf_python") def __setitem__(self, key, value): from cudf.core.column import column @@ -153,6 +156,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ + @annotate("SERIESLOC_GETITEM", color="green", domain="cudf_python") def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -175,6 +179,7 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: return self._frame.iloc[arg] + @annotate("SERIESLOC_SETITEM", color="green", domain="cudf_python") def __setitem__(self, key, value): try: key = self._loc_to_iloc(key) @@ -298,6 +303,7 @@ def _constructor_expanddim(self): return cudf.DataFrame @classmethod + @annotate("SERIES_FROM_CATEGORICAL", color="green", domain="cudf_python") def from_categorical(cls, categorical, codes=None): """Creates from a pandas.Categorical @@ -338,6 +344,7 @@ def from_categorical(cls, categorical, codes=None): return Series(data=col) @classmethod + @annotate("SERIES_FROM_MASKED_ARRAY", color="green", domain="cudf_python") def from_masked_array(cls, data, mask, null_count=None): """Create a Series with null-mask. This is equivalent to: @@ -386,6 +393,7 @@ def from_masked_array(cls, data, mask, null_count=None): col = column.as_column(data).set_mask(mask) return cls(data=col) + @annotate("SERIES_INIT", color="green", domain="cudf_python") def __init__( self, data=None, index=None, dtype=None, name=None, nan_as_null=True, ): @@ -447,6 +455,7 @@ def __init__( self._index = RangeIndex(len(data)) if index is None else index @classmethod + @annotate("SERIES_FROM_DATA", color="green", domain="cudf_python") def _from_data( cls, data: MutableMapping, @@ -461,10 +470,12 @@ def _from_data( out._index = RangeIndex(out._data.nrows) return out + @annotate("SERIES_CONTAINS", color="green", domain="cudf_python") def __contains__(self, item): return item in self._index @classmethod + @annotate("SERIES_FROM_PANDAS", color="green", domain="cudf_python") def from_pandas(cls, s, nan_as_null=None): """ Convert from a Pandas Series. @@ -505,7 +516,8 @@ def from_pandas(cls, s, nan_as_null=None): """ return cls(s, nan_as_null=nan_as_null) - @property + @property # type: ignore + @annotate("SERIES_DT", color="green", domain="cudf_python") def dt(self): """ Accessor object for datetime-like properties of the Series values. @@ -546,6 +558,7 @@ def dt(self): "Can only use .dt accessor with datetimelike values" ) + @annotate("SERIES_SERIALIZE", color="green", domain="cudf_python") def serialize(self): header, frames = super().serialize() @@ -558,6 +571,7 @@ def serialize(self): return header, frames @classmethod + @annotate("SERIES_DESERIALIZE", color="green", domain="cudf_python") def deserialize(cls, header, frames): index_nframes = header["index_frame_count"] obj = super().deserialize( @@ -584,6 +598,7 @@ def _get_columns_by_label(self, labels, downcast=False): else self.__class__(dtype=self.dtype, name=self.name) ) + @annotate("SERIES_DROP", color="green", domain="cudf_python") def drop( self, labels=None, @@ -708,6 +723,7 @@ def drop( if not inplace: return out + @annotate("SERIES_APPEND", color="green", domain="cudf_python") def append(self, to_append, ignore_index=False, verify_integrity=False): """Append values from another ``Series`` or array-like object. If ``ignore_index=True``, the index is reset. @@ -794,6 +810,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): return cudf.concat(to_concat, ignore_index=ignore_index) + @annotate("SERIES_REINDEX", color="green", domain="cudf_python") def reindex(self, index=None, copy=True): """Return a Series that conforms to a new index @@ -829,6 +846,7 @@ def reindex(self, index=None, copy=True): series.name = self.name return series + @annotate("SERIES_RESET_INDEX", color="green", domain="cudf_python") @docutils.doc_apply( doc_reset_index_template.format( klass="Series", @@ -910,6 +928,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): inplace=inplace, ) + @annotate("SERIES_TO_FRAME", color="green", domain="cudf_python") def to_frame(self, name=None): """Convert Series into a DataFrame @@ -952,9 +971,11 @@ def to_frame(self, name=None): return cudf.DataFrame({col: self._column}, index=self.index) + @annotate("SERIES_MEMORY_USAGE", color="green", domain="cudf_python") def memory_usage(self, index=True, deep=False): return sum(super().memory_usage(index, deep).values()) + @annotate("SERIES_ARRAY_FUNCTION", color="green", domain="cudf_python") def __array_function__(self, func, types, args, kwargs): if "out" in kwargs or not all(issubclass(t, Series) for t in types): return NotImplemented @@ -1010,6 +1031,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented + @annotate("SERIES_MAP", color="green", domain="cudf_python") def map(self, arg, na_action=None) -> "Series": """ Map values of Series according to input correspondence. @@ -1111,6 +1133,7 @@ def map(self, arg, na_action=None) -> "Series": result = self.applymap(arg) return result + @annotate("SERIES_GETITEM", color="green", domain="cudf_python") def __getitem__(self, arg): if isinstance(arg, slice): return self.iloc[arg] @@ -1121,6 +1144,7 @@ def __getitem__(self, arg): items = SingleColumnFrame.__iter__ + @annotate("SERIES_SETITEM", color="green", domain="cudf_python") def __setitem__(self, key, value): if isinstance(key, slice): self.iloc[key] = value @@ -1272,6 +1296,7 @@ def _make_operands_and_index_for_binop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return operands, lhs._index + @annotate("SERIES_LOGICAL_AND", color="green", domain="cudf_python") def logical_and(self, other): warnings.warn( "Series.logical_and is deprecated and will be removed.", @@ -1279,6 +1304,7 @@ def logical_and(self, other): ) return self._binaryop(other, "__l_and__").astype(np.bool_) + @annotate("SERIES_REMAINDER", color="green", domain="cudf_python") def remainder(self, other): warnings.warn( "Series.remainder is deprecated and will be removed.", @@ -1286,6 +1312,7 @@ def remainder(self, other): ) return self._binaryop(other, "__mod__") + @annotate("SERIES_LOGICAL_OR", color="green", domain="cudf_python") def logical_or(self, other): warnings.warn( "Series.logical_or is deprecated and will be removed.", @@ -1293,6 +1320,7 @@ def logical_or(self, other): ) return self._binaryop(other, "__l_or__").astype(np.bool_) + @annotate("SERIES_LOGICAL_NOT", color="green", domain="cudf_python") def logical_not(self): warnings.warn( "Series.logical_not is deprecated and will be removed.", @@ -1302,30 +1330,36 @@ def logical_not(self): @copy_docstring(CategoricalAccessor) # type: ignore @property + @annotate("SERIES_CAT", color="green", domain="cudf_python") def cat(self): return CategoricalAccessor(parent=self) @copy_docstring(StringMethods) # type: ignore @property + @annotate("SERIES_STR", color="green", domain="cudf_python") def str(self): return StringMethods(parent=self) @copy_docstring(ListMethods) # type: ignore @property + @annotate("SERIES_LIST", color="green", domain="cudf_python") def list(self): return ListMethods(parent=self) @copy_docstring(StructMethods) # type: ignore @property + @annotate("SERIES_STRUCT", color="green", domain="cudf_python") def struct(self): return StructMethods(parent=self) - @property + @property # type: ignore + @annotate("SERIES_DTYPE", color="green", domain="cudf_python") def dtype(self): """dtype of the Series""" return self._column.dtype @classmethod + @annotate("SERIES_CONCAT", color="green", domain="cudf_python") def _concat(cls, objs, axis=0, index=True): # Concatenate index if not provided if index is True: @@ -1395,22 +1429,26 @@ def _concat(cls, objs, axis=0, index=True): return cls(data=col, index=index, name=name) - @property + @property # type: ignore + @annotate("SERIES_VALID_COUNT", color="green", domain="cudf_python") def valid_count(self): """Number of non-null values""" return self._column.valid_count - @property + @property # type: ignore + @annotate("SERIES_NULL_COUNT", color="green", domain="cudf_python") def null_count(self): """Number of null values""" return self._column.null_count - @property + @property # type: ignore + @annotate("SERIES_NULLABLE", color="green", domain="cudf_python") def nullable(self): """A boolean indicating whether a null-mask is needed""" return self._column.nullable - @property + @property # type: ignore + @annotate("SERIES_HAS_NULLS", color="green", domain="cudf_python") def has_nulls(self): """ Indicator whether Series contains null values. @@ -1439,13 +1477,14 @@ def has_nulls(self): """ return self._column.has_nulls() + @annotate("SERIES_DROPNA", color="green", domain="cudf_python") def dropna(self, axis=0, inplace=False, how=None): """ Return a Series with null values removed. Parameters ---------- - axis : {0 or ‘index’}, default 0 + axis : {0 or 'index'}, default 0 There is only one axis to drop values from. inplace : bool, default False If True, do operation inplace and return None. @@ -1518,6 +1557,7 @@ def dropna(self, axis=0, inplace=False, how=None): return self._mimic_inplace(result, inplace=inplace) + @annotate("SERIES_DROP_DUPLICATES", color="green", domain="cudf_python") def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): """ Return Series with duplicate values removed. @@ -1553,9 +1593,9 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): Name: animal, dtype: object With the `keep` parameter, the selection behaviour of duplicated - values can be changed. The value ‘first’ keeps the first + values can be changed. The value 'first' keeps the first occurrence for each set of duplicated entries. - The default value of keep is ‘first’. Note that order of + The default value of keep is 'first'. Note that order of the rows being returned is not guaranteed to be sorted. @@ -1566,7 +1606,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): 0 lama Name: animal, dtype: object - The value ‘last’ for parameter `keep` keeps the last occurrence + The value 'last' for parameter `keep` keeps the last occurrence for each set of duplicated entries. >>> s.drop_duplicates(keep='last') @@ -1591,6 +1631,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): return self._mimic_inplace(result, inplace=inplace) + @annotate("SERIES_FILLNA", color="green", domain="cudf_python") def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None ): @@ -1614,6 +1655,7 @@ def fillna( value=value, method=method, axis=axis, inplace=inplace, limit=limit ) + @annotate("SERIES_ALL", color="green", domain="cudf_python") def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1621,6 +1663,7 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): ) return super().all(axis, skipna, level, **kwargs) + @annotate("SERIES_ANY", color="green", domain="cudf_python") def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1628,6 +1671,7 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): ) return super().any(axis, skipna, level, **kwargs) + @annotate("SERIES_TO_PANDAS", color="green", domain="cudf_python") def to_pandas(self, index=True, nullable=False, **kwargs): """ Convert to a Pandas Series. @@ -1691,7 +1735,8 @@ def to_pandas(self, index=True, nullable=False, **kwargs): s.name = self.name return s - @property + @property # type: ignore + @annotate("SERIES_DATA", color="green", domain="cudf_python") def data(self): """The gpu buffer for the data @@ -1717,11 +1762,13 @@ def data(self): """ # noqa: E501 return self._column.data - @property + @property # type: ignore + @annotate("SERIES_NULLMASK", color="green", domain="cudf_python") def nullmask(self): """The gpu buffer for the null-mask""" return cudf.Series(self._column.nullmask) + @annotate("SERIES_ASTYPE", color="green", domain="cudf_python") def astype(self, dtype, copy=False, errors="raise"): """ Cast the Series to the given dtype @@ -1828,11 +1875,13 @@ def astype(self, dtype, copy=False, errors="raise"): pass return self + @annotate("SERIES_SORT_INDEX", color="green", domain="cudf_python") def sort_index(self, axis=0, *args, **kwargs): if axis not in (0, "index"): raise ValueError("Only axis=0 is valid for Series.") return super().sort_index(axis=axis, *args, **kwargs) + @annotate("SERIES_SORT_VALUES", color="green", domain="cudf_python") def sort_values( self, axis=0, @@ -1887,6 +1936,7 @@ def sort_values( ignore_index=ignore_index, ) + @annotate("SERIES_NLARGEST", color="green", domain="cudf_python") def nlargest(self, n=5, keep="first"): """Returns a new Series of the *n* largest element. @@ -1949,6 +1999,7 @@ def nlargest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(True, n, [self.name], keep) + @annotate("SERIES_NSMALLEST", color="green", domain="cudf_python") def nsmallest(self, n=5, keep="first"): """ Returns a new Series of the *n* smallest element. @@ -2024,6 +2075,7 @@ def nsmallest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(False, n, [self.name], keep) + @annotate("SERIES_ARGSORT", color="green", domain="cudf_python") def argsort( self, axis=0, @@ -2046,6 +2098,7 @@ def argsort( obj.name = self.name return obj + @annotate("SERIES_REPLACE", color="green", domain="cudf_python") def replace(self, to_replace=None, value=None, *args, **kwargs): if is_dict_like(to_replace) and value is not None: raise ValueError( @@ -2055,6 +2108,7 @@ def replace(self, to_replace=None, value=None, *args, **kwargs): return super().replace(to_replace, value, *args, **kwargs) + @annotate("SERIES_UPDATE", color="green", domain="cudf_python") def update(self, other): """ Modify Series in place using values from passed Series. @@ -2159,6 +2213,7 @@ def update(self, other): self.mask(mask, other, inplace=True) + @annotate("SERIES_LABEL_ENCODING", color="green", domain="cudf_python") def _label_encoding(self, cats, dtype=None, na_sentinel=-1): # Private implementation of deprecated public label_encoding method def _return_sentinel_series(): @@ -2202,6 +2257,7 @@ def _return_sentinel_series(): return codes # UDF related + @annotate("SERIES_APPLY", color="green", domain="cudf_python") def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. @@ -2290,6 +2346,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): raise ValueError("Series.apply only supports convert_dtype=True") return self._apply(func, _get_scalar_kernel, *args, **kwargs) + @annotate("SERIES_APPLY_MAP", color="green", domain="cudf_python") def applymap(self, udf, out_dtype=None): """Apply an elementwise function to transform the values in the Column. @@ -2405,6 +2462,7 @@ def applymap(self, udf, out_dtype=None): # # Stats # + @annotate("SERIES_COUNT", color="green", domain="cudf_python") def count(self, level=None, **kwargs): """ Return number of non-NA/null observations in the Series @@ -2431,6 +2489,7 @@ def count(self, level=None, **kwargs): return self.valid_count + @annotate("SERIES_MODE", color="green", domain="cudf_python") def mode(self, dropna=True): """ Return the mode(s) of the dataset. @@ -2499,6 +2558,7 @@ def mode(self, dropna=True): return Series(val_counts.index.sort_values(), name=self.name) + @annotate("SERIES_ROUND", color="green", domain="cudf_python") def round(self, decimals=0, how="half_even"): if not is_integer(decimals): raise ValueError( @@ -2507,6 +2567,7 @@ def round(self, decimals=0, how="half_even"): decimals = int(decimals) return super().round(decimals, how) + @annotate("SERIES_COV", color="green", domain="cudf_python") def cov(self, other, min_periods=None): """ Compute covariance with Series, excluding missing values. @@ -2556,6 +2617,7 @@ def cov(self, other, min_periods=None): f"{other.dtype}" ) + @annotate("SERIES_TRANSPOSE", color="green", domain="cudf_python") def transpose(self): """Return the transpose, which is by definition self. """ @@ -2564,6 +2626,7 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) + @annotate("SERIES_CORR", color="green", domain="cudf_python") def corr(self, other, method="pearson", min_periods=None): """Calculates the sample correlation between two Series, excluding missing values. @@ -2597,6 +2660,7 @@ def corr(self, other, method="pearson", min_periods=None): f"cannot perform corr with types {self.dtype}, {other.dtype}" ) + @annotate("SERIES_AUTOCORR", color="green", domain="cudf_python") def autocorr(self, lag=1): """Compute the lag-N autocorrelation. This method computes the Pearson correlation between the Series and its shifted self. @@ -2622,6 +2686,7 @@ def autocorr(self, lag=1): """ return self.corr(self.shift(lag)) + @annotate("SERIES_ISIN", color="green", domain="cudf_python") def isin(self, values): """Check whether values are contained in Series. @@ -2691,6 +2756,7 @@ def isin(self, values): {self.name: self._column.isin(values)}, index=self.index ) + @annotate("SERIES_UNIQUE", color="green", domain="cudf_python") def unique(self): """ Returns unique values of this Series. @@ -2723,6 +2789,7 @@ def unique(self): res = self._column.unique() return Series(res, name=self.name) + @annotate("SERIES_VALUE_COUNTS", color="green", domain="cudf_python") def value_counts( self, normalize=False, @@ -2845,6 +2912,7 @@ def value_counts( res = res / float(res._column.sum()) return res + @annotate("SERIES_QUANTILE", color="green", domain="cudf_python") def quantile( self, q=0.5, interpolation="linear", exact=True, quant_index=True ): @@ -2909,6 +2977,7 @@ def quantile( return Series(result, index=index, name=self.name) @docutils.doc_describe() + @annotate("SERIES_DESCRIBE", color="green", domain="cudf_python") def describe( self, percentiles=None, @@ -3064,6 +3133,7 @@ def _describe_timestamp(self): else: return _describe_categorical(self) + @annotate("SERIES_DIGITIZE", color="green", domain="cudf_python") def digitize(self, bins, right=False): """Return the indices of the bins to which each value in series belongs. @@ -3099,6 +3169,7 @@ def digitize(self, bins, right=False): cudf.core.column.numerical.digitize(self._column, bins, right) ) + @annotate("SERIES_DIFF", color="green", domain="cudf_python") def diff(self, periods=1): """Calculate the difference between values at positions i and i - N in an array and store the output in a new array. @@ -3187,6 +3258,7 @@ def diff(self, periods=1): return Series(output_col, name=self.name, index=self.index) @copy_docstring(SeriesGroupBy) + @annotate("SERIES_GROUPBY", color="green", domain="cudf_python") def groupby( self, by=None, @@ -3232,6 +3304,7 @@ def groupby( ) ) + @annotate("SERIES_RENAME", color="green", domain="cudf_python") def rename(self, index=None, copy=True): """ Alter Series name @@ -3277,6 +3350,7 @@ def rename(self, index=None, copy=True): out_data = self._data.copy(deep=copy) return Series._from_data(out_data, self.index, name=index) + @annotate("SERIES_MERGE", color="green", domain="cudf_python") def merge( self, other, @@ -3328,18 +3402,21 @@ def merge( return result + @annotate("SERIES_ADD_PREFIX", color="green", domain="cudf_python") def add_prefix(self, prefix): return Series._from_data( data=self._data.copy(deep=True), index=prefix + self.index.astype(str), ) + @annotate("SERIES_ADD_SUFFIX", color="green", domain="cudf_python") def add_suffix(self, suffix): return Series._from_data( data=self._data.copy(deep=True), index=self.index.astype(str) + suffix, ) + @annotate("SERIES_KEYS", color="green", domain="cudf_python") def keys(self): """ Return alias for index. @@ -3383,6 +3460,7 @@ def keys(self): """ return self.index + @annotate("SERIES_EXPLODE", color="green", domain="cudf_python") def explode(self, ignore_index=False): """ Transform each element of a list-like to a row, replicating index @@ -3424,6 +3502,7 @@ def explode(self, ignore_index=False): return super()._explode(self._column_names[0], ignore_index) + @annotate("SERIES_PCT_CHANGE", color="green", domain="cudf_python") def pct_change( self, periods=1, fill_method="ffill", limit=None, freq=None ): @@ -3575,7 +3654,8 @@ class DatetimeProperties: def __init__(self, series): self.series = series - @property + @property # type: ignore + @annotate("SERIES_DT_YEAR", color="green", domain="cudf_python") def year(self): """ The year of the datetime. @@ -3599,7 +3679,8 @@ def year(self): """ return self._get_dt_field("year") - @property + @property # type: ignore + @annotate("SERIES_DT_MONTH", color="green", domain="cudf_python") def month(self): """ The month as January=1, December=12. @@ -3623,7 +3704,8 @@ def month(self): """ return self._get_dt_field("month") - @property + @property # type: ignore + @annotate("SERIES_DT_DAY", color="green", domain="cudf_python") def day(self): """ The day of the datetime. @@ -3647,7 +3729,8 @@ def day(self): """ return self._get_dt_field("day") - @property + @property # type: ignore + @annotate("SERIES_DT_HOUR", color="green", domain="cudf_python") def hour(self): """ The hours of the datetime. @@ -3671,7 +3754,8 @@ def hour(self): """ return self._get_dt_field("hour") - @property + @property # type: ignore + @annotate("SERIES_DT_MINUTE", color="green", domain="cudf_python") def minute(self): """ The minutes of the datetime. @@ -3695,7 +3779,8 @@ def minute(self): """ return self._get_dt_field("minute") - @property + @property # type: ignore + @annotate("SERIES_DT_SECOND", color="green", domain="cudf_python") def second(self): """ The seconds of the datetime. @@ -3719,7 +3804,8 @@ def second(self): """ return self._get_dt_field("second") - @property + @property # type: ignore + @annotate("SERIES_DT_WEEKDAY", color="green", domain="cudf_python") def weekday(self): """ The day of the week with Monday=0, Sunday=6. @@ -3755,7 +3841,8 @@ def weekday(self): """ return self._get_dt_field("weekday") - @property + @property # type: ignore + @annotate("SERIES_DT_DAYOFWEEK", color="green", domain="cudf_python") def dayofweek(self): """ The day of the week with Monday=0, Sunday=6. @@ -3791,7 +3878,8 @@ def dayofweek(self): """ return self._get_dt_field("weekday") - @property + @property # type: ignore + @annotate("SERIES_DT_DAYOFYEAR", color="green", domain="cudf_python") def dayofyear(self): """ The day of the year, from 1-365 in non-leap years and @@ -3828,7 +3916,8 @@ def dayofyear(self): """ return self._get_dt_field("day_of_year") - @property + @property # type: ignore + @annotate("SERIES_DT_DAY_OF_YEAR", color="green", domain="cudf_python") def day_of_year(self): """ The day of the year, from 1-365 in non-leap years and @@ -3865,7 +3954,8 @@ def day_of_year(self): """ return self._get_dt_field("day_of_year") - @property + @property # type: ignore + @annotate("SERIES_DT_IS_LEAP_YEAR", color="green", domain="cudf_python") def is_leap_year(self): """ Boolean indicator if the date belongs to a leap year. @@ -3923,7 +4013,8 @@ def is_leap_year(self): name=self.series.name, ) - @property + @property # type: ignore + @annotate("SERIES_DT_QUARTER", color="green", domain="cudf_python") def quarter(self): """ Integer indicator for which quarter of the year the date belongs in. @@ -3954,6 +4045,7 @@ def quarter(self): {None: res}, index=self.series._index, name=self.series.name, ) + @annotate("SERIES_DT_ISOCALENDAR", color="green", domain="cudf_python") def isocalendar(self): """ Returns a DataFrame with the year, week, and day @@ -3997,14 +4089,16 @@ def isocalendar(self): """ return cudf.core.tools.datetimes._to_iso_calendar(self) - @property + @property # type: ignore + @annotate("SERIES_DT_IS_MONTH_START", color="green", domain="cudf_python") def is_month_start(self): """ Booleans indicating if dates are the first day of the month. """ return (self.day == 1).fillna(False) - @property + @property # type: ignore + @annotate("SERIES_DT_DAYS_IN_MONTH", color="green", domain="cudf_python") def days_in_month(self): """ Get the total number of days in the month that the date falls on. @@ -4055,7 +4149,8 @@ def days_in_month(self): name=self.series.name, ) - @property + @property # type: ignore + @annotate("SERIES_DT_IS_MONTH_END", color="green", domain="cudf_python") def is_month_end(self): """ Boolean indicator if the date is the last day of the month. @@ -4101,7 +4196,10 @@ def is_month_end(self): ) return (self.day == last_day.dt.day).fillna(False) - @property + @property # type: ignore + @annotate( + "SERIES_DT_IS_EQUARTER_START", color="green", domain="cudf_python" + ) def is_quarter_start(self): """ Boolean indicator if the date is the first day of a quarter. @@ -4147,7 +4245,8 @@ def is_quarter_start(self): {None: result}, index=self.series._index, name=self.series.name, ) - @property + @property # type: ignore + @annotate("SERIES_DT_IS_QUARTER_END", color="green", domain="cudf_python") def is_quarter_end(self): """ Boolean indicator if the date is the last day of a quarter. @@ -4195,7 +4294,8 @@ def is_quarter_end(self): {None: result}, index=self.series._index, name=self.series.name, ) - @property + @property # type: ignore + @annotate("SERIES_DT_IS_YEAR_START", color="green", domain="cudf_python") def is_year_start(self): """ Boolean indicator if the date is the first day of the year. @@ -4229,7 +4329,8 @@ def is_year_start(self): name=self.series.name, ) - @property + @property # type: ignore + @annotate("SERIES_DT_IS_YEAR_END", color="green", domain="cudf_python") def is_year_end(self): """ Boolean indicator if the date is the last day of the year. @@ -4265,12 +4366,14 @@ def is_year_end(self): {None: result}, index=self.series._index, name=self.series.name, ) + @annotate("SERIES_DT_GET_DT_FIELD", color="green", domain="cudf_python") def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( data=out_column, index=self.series._index, name=self.series.name ) + @annotate("SERIES_DT_CEIL", color="green", domain="cudf_python") def ceil(self, freq): """ Perform ceil operation on the data to the specified freq. @@ -4307,6 +4410,7 @@ def ceil(self, freq): data={self.series.name: out_column}, index=self.series._index ) + @annotate("SERIES_DT_FLOOR", color="green", domain="cudf_python") def floor(self, freq): """ Perform floor operation on the data to the specified freq. @@ -4343,6 +4447,7 @@ def floor(self, freq): data={self.series.name: out_column}, index=self.series._index ) + @annotate("SERIES_DT_ROUND", color="green", domain="cudf_python") def round(self, freq): """ Perform round operation on the data to the specified freq. @@ -4382,6 +4487,7 @@ def round(self, freq): data={self.series.name: out_column}, index=self.series._index ) + @annotate("SERIES_DT_STRFTIME", color="green", domain="cudf_python") def strftime(self, date_format, *args, **kwargs): """ Convert to Series using specified ``date_format``. @@ -4535,7 +4641,8 @@ class TimedeltaProperties: def __init__(self, series): self.series = series - @property + @property # type: ignore + @annotate("SERIES_TD_DAYS", color="green", domain="cudf_python") def days(self): """ Number of days. @@ -4566,7 +4673,8 @@ def days(self): """ return self._get_td_field("days") - @property + @property # type: ignore + @annotate("SERIES_TD_SECONDS", color="green", domain="cudf_python") def seconds(self): """ Number of seconds (>= 0 and less than 1 day). @@ -4604,7 +4712,8 @@ def seconds(self): """ return self._get_td_field("seconds") - @property + @property # type: ignore + @annotate("SERIES_TD_MICROSECONDS", color="green", domain="cudf_python") def microseconds(self): """ Number of microseconds (>= 0 and less than 1 second). @@ -4635,7 +4744,8 @@ def microseconds(self): """ return self._get_td_field("microseconds") - @property + @property # type: ignore + @annotate("SERIES_TD_NANOSECONDS", color="green", domain="cudf_python") def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -4666,7 +4776,8 @@ def nanoseconds(self): """ return self._get_td_field("nanoseconds") - @property + @property # type: ignore + @annotate("SERIES_TD_COMPONENTS", color="green", domain="cudf_python") def components(self): """ Return a Dataframe of the components of the Timedeltas. @@ -4695,6 +4806,7 @@ def components(self): """ # noqa: E501 return self.series._column.components(index=self.series._index) + @annotate("SERIES_TD_GET_TD_FIELD", color="green", domain="cudf_python") def _get_td_field(self, field): out_column = getattr(self.series._column, field) return Series( @@ -4702,6 +4814,7 @@ def _get_td_field(self, field): ) +@annotate("SERIES__ALIGN_INDICES", color="green", domain="cudf_python") def _align_indices(series_list, how="outer", allow_non_unique=False): """ Internal util to align the indices of a list of Series objects @@ -4769,6 +4882,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): return result +@annotate("CUDF_ISCLOSE", color="green", domain="cudf_python") def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): """Returns a boolean array where two arrays are equal within a tolerance. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 508bbfb3a9a..666b743f7ef 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -18,6 +18,7 @@ import cupy import numpy as np import pandas as pd +from nvtx import annotate import cudf from cudf._typing import Dtype @@ -42,6 +43,7 @@ class SingleColumnFrame(Frame, NotIterable): "index": 0, } + @annotate("SINGLECOLUMNFRAME_REDUCE", color="green", domain="cudf_python") def _reduce( self, op, axis=None, level=None, numeric_only=None, **kwargs, ): @@ -60,6 +62,7 @@ def _reduce( except AttributeError: raise TypeError(f"cannot perform {op} with type {self.dtype}") + @annotate("SINGLECOLUMNFRAME_SCAN", color="green", domain="cudf_python") def _scan(self, op, axis=None, *args, **kwargs): if axis not in (None, 0): raise NotImplementedError("axis parameter is not implemented yet") @@ -67,6 +70,9 @@ def _scan(self, op, axis=None, *args, **kwargs): return super()._scan(op, axis=axis, *args, **kwargs) @classmethod + @annotate( + "SINGLECOLUMNFRAME_FROM_DATA", color="green", domain="cudf_python" + ) def _from_data( cls, data: MutableMapping, @@ -79,21 +85,27 @@ def _from_data( out.name = name return out - @property + @property # type: ignore + @annotate("SINGLECOLUMNFRAME_NAME", color="green", domain="cudf_python") def name(self): """Get the name of this object.""" return next(iter(self._data.names)) - @name.setter + @name.setter # type: ignore + @annotate( + "SINGLECOLUMNFRAME_NAME_SETTER", color="green", domain="cudf_python" + ) def name(self, value): self._data[value] = self._data.pop(self.name) - @property + @property # type: ignore + @annotate("SINGLECOLUMNFRAME_NDIM", color="green", domain="cudf_python") def ndim(self): """Get the dimensionality (always 1 for single-columned frames).""" return 1 - @property + @property # type: ignore + @annotate("SINGLECOLUMNFRAME_SHAPE", color="green", domain="cudf_python") def shape(self): """Get a tuple representing the dimensionality of the Index.""" return (len(self),) @@ -104,26 +116,38 @@ def __bool__(self): "a.empty, a.bool(), a.item(), a.any() or a.all()." ) - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_NUM_COLUMNS", color="green", domain="cudf_python" + ) def _num_columns(self): return 1 - @property + @property # type: ignore + @annotate("SINGLECOLUMNFRAME_COLUMN", color="green", domain="cudf_python") def _column(self): return self._data[self.name] - @_column.setter + @_column.setter # type: ignore + @annotate( + "SINGLECOLUMNFRAME_COLUMN_SETTER", color="green", domain="cudf_python" + ) def _column(self, value): self._data[self.name] = value - @property + @property # type: ignore + @annotate("SINGLECOLUMNFRAME_VALUES", color="green", domain="cudf_python") def values(self): # noqa: D102 return self._column.values - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_VALUES_HOST", color="green", domain="cudf_python" + ) def values_host(self): # noqa: D102 return self._column.values_host + @annotate("SINGLECOLUMNFRAME_TO_CUPY", color="green", domain="cudf_python") def to_cupy( self, dtype: Union[Dtype, None] = None, @@ -132,6 +156,9 @@ def to_cupy( ) -> cupy.ndarray: # noqa: D102 return super().to_cupy(dtype, copy, na_value).flatten() + @annotate( + "SINGLECOLUMNFRAME_TO_NUMPY", color="green", domain="cudf_python" + ) def to_numpy( self, dtype: Union[Dtype, None] = None, @@ -151,6 +178,9 @@ def tolist(self): # noqa: D102 to_list = tolist @classmethod + @annotate( + "SINGLECOLUMNFRAME_FROM_ARROW", color="green", domain="cudf_python" + ) def from_arrow(cls, array): """Create from PyArrow Array/ChunkedArray. @@ -181,6 +211,9 @@ def from_arrow(cls, array): """ return cls(ColumnBase.from_arrow(array)) + @annotate( + "SINGLECOLUMNFRAME_TO_ARROW", color="green", domain="cudf_python" + ) def to_arrow(self): """ Convert to a PyArrow Array. @@ -211,7 +244,10 @@ def to_arrow(self): """ return self._column.to_arrow() - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_IS_UNIQUE", color="green", domain="cudf_python" + ) def is_unique(self): """Return boolean if values in the object are unique. @@ -221,7 +257,10 @@ def is_unique(self): """ return self._column.is_unique - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_IS_MONOTONIC", color="green", domain="cudf_python" + ) def is_monotonic(self): """Return boolean if values in the object are monotonically increasing. @@ -233,7 +272,12 @@ def is_monotonic(self): """ return self.is_monotonic_increasing - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_IS_MONOTONIC_INCREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_increasing(self): """Return boolean if values in the object are monotonically increasing. @@ -243,7 +287,12 @@ def is_monotonic_increasing(self): """ return self._column.is_monotonic_increasing - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_IS_MONOTONIC_DECREASING", + color="green", + domain="cudf_python", + ) def is_monotonic_decreasing(self): """Return boolean if values in the object are monotonically decreasing. @@ -253,10 +302,18 @@ def is_monotonic_decreasing(self): """ return self._column.is_monotonic_decreasing - @property + @property # type: ignore + @annotate( + "SINGLECOLUMNFRAME_CUDA_ARRAY_INTERFACE", + color="green", + domain="cudf_python", + ) def __cuda_array_interface__(self): return self._column.__cuda_array_interface__ + @annotate( + "SINGLECOLUMNFRAME_FACTORIZE", color="green", domain="cudf_python" + ) def factorize(self, na_sentinel=-1): """Encode the input values as integer labels. @@ -284,6 +341,11 @@ def factorize(self, na_sentinel=-1): """ return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel) + @annotate( + "SINGLECOLUMNFRAME_MAKE_OPERANDS_FOR_BINOP", + color="green", + domain="cudf_python", + ) def _make_operands_for_binop( self, other: Any, @@ -337,6 +399,7 @@ def _make_operands_for_binop( return {result_name: (self._column, other, reflect, fill_value)} + @annotate("SINGLECOLUMNFRAME_NUNIQUE", color="green", domain="cudf_python") def nunique(self, method: builtins.str = "sort", dropna: bool = True): """ Return count of unique values for the column.