From 60f7a43e94db9209e0e696dcde39f15b0e078bd7 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 11 Feb 2022 13:33:42 -0500 Subject: [PATCH 1/3] Replace custom `cached_property` implementation with functools --- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/column_accessor.py | 7 +++---- python/cudf/cudf/core/frame.py | 4 ++-- python/cudf/cudf/core/groupby/groupby.py | 3 ++- python/cudf/cudf/core/index.py | 3 ++- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/join/join.py | 4 ++-- python/cudf/cudf/core/multiindex.py | 7 ++----- python/cudf/cudf/utils/utils.py | 22 ---------------------- 9 files changed, 15 insertions(+), 39 deletions(-) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 2e6f138d2e3..1d8d04e897b 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -3,6 +3,7 @@ from __future__ import annotations import pickle +from functools import cached_property from typing import Any, Set import pandas as pd @@ -31,7 +32,6 @@ is_mixed_with_object_dtype, numeric_normalize_types, ) -from cudf.utils.utils import cached_property class BaseIndex(Serializable): diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 67976ac27d4..9cb86ca1cd2 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -4,7 +4,7 @@ import itertools from collections.abc import MutableMapping -from functools import reduce +from functools import cached_property, reduce from typing import ( TYPE_CHECKING, Any, @@ -20,7 +20,6 @@ import cudf from cudf.core import column -from cudf.utils.utils import cached_property if TYPE_CHECKING: from cudf.core.column import ColumnBase @@ -360,9 +359,9 @@ def select_by_index(self, index: Any) -> ColumnAccessor: start, stop, step = index.indices(len(self._data)) keys = self.names[start:stop:step] elif pd.api.types.is_integer(index): - keys = [self.names[index]] + keys = (self.names[index],) else: - keys = (self.names[i] for i in index) + keys = tuple(self.names[i] for i in index) data = {k: self._data[k] for k in keys} return self.__class__( data, multiindex=self.multiindex, level_names=self.level_names, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a9e425f2012..1d207c7df6b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -91,7 +91,7 @@ def _num_rows(self) -> int: @property def _column_names(self) -> List[Any]: # TODO: List[str]? - return self._data.names + return list(self._data.names) @property def _index_names(self) -> List[Any]: # TODO: List[str]? @@ -105,7 +105,7 @@ def _index_names(self) -> List[Any]: # TODO: List[str]? @property def _columns(self) -> List[Any]: # TODO: List[Column]? - return self._data.columns + return list(self._data.columns) def serialize(self): header = { diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ff700144bed..4bd14a2c47b 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -4,6 +4,7 @@ import itertools import pickle import warnings +from functools import cached_property import numpy as np import pandas as pd @@ -16,7 +17,7 @@ from cudf.core.abc import Serializable from cudf.core.column.column import arange, as_column from cudf.core.multiindex import MultiIndex -from cudf.utils.utils import GetAttrGetItemMixin, cached_property +from cudf.utils.utils import GetAttrGetItemMixin # The three functions below return the quantiles [25%, 50%, 75%] diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index f71f930a21c..b776c0217b9 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -5,6 +5,7 @@ import math import pickle import warnings +from functools import cached_property from numbers import Number from typing import ( Any, @@ -54,7 +55,7 @@ from cudf.core.single_column_frame import SingleColumnFrame from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import find_common_type -from cudf.utils.utils import cached_property, search_range +from cudf.utils.utils import search_range T = TypeVar("T", bound="Frame") diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index fab5d75f62b..1496808a4d1 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -6,6 +6,7 @@ import operator import warnings from collections import Counter, abc +from functools import cached_property from typing import Callable, Type, TypeVar from uuid import uuid4 @@ -30,7 +31,6 @@ from cudf.core.index import Index, RangeIndex, _index_from_columns from cudf.core.multiindex import MultiIndex from cudf.core.udf.utils import _compile_or_get, _supported_cols_from_frame -from cudf.utils.utils import cached_property doc_reset_index_template = """ Reset the index of the {klass}, or a level of it. diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 39ff4718550..960bfa155fb 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -1,7 +1,7 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. from __future__ import annotations -from typing import TYPE_CHECKING, Callable, cast +from typing import TYPE_CHECKING, Any, Callable, List, cast import cudf from cudf import _lib as libcudf @@ -320,7 +320,7 @@ def _sort_result(self, result: Frame) -> Frame: # same order as given in 'on'. If the indices are used as # keys, the index will be sorted. If one index is specified, # the key columns on the other side will be used to sort. - by = [] + by: List[Any] = [] if self._using_left_index and self._using_right_index: if result._index is not None: by.extend(result._index._data.columns) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 8581b97c217..56826b18d45 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -6,6 +6,7 @@ import numbers import pickle from collections.abc import Sequence +from functools import cached_property from numbers import Integral from typing import Any, List, MutableMapping, Optional, Tuple, Union @@ -22,11 +23,7 @@ from cudf.core._compat import PANDAS_GE_120 from cudf.core.frame import Frame from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index -from cudf.utils.utils import ( - NotIterable, - _maybe_indices_to_slice, - cached_property, -) +from cudf.utils.utils import NotIterable, _maybe_indices_to_slice class MultiIndex(Frame, BaseIndex, NotIterable): diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 8571d9ffed5..4143cbd1d66 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -144,28 +144,6 @@ def set_allocator( IS_NEP18_ACTIVE = _is_nep18_active() -class cached_property: - """ - Like @property, but only evaluated upon first invocation. - To force re-evaluation of a cached_property, simply delete - it with `del`. - """ - - # TODO: Can be replaced with functools.cached_property when we drop support - # for Python 3.7. - - def __init__(self, func): - self.func = func - - def __get__(self, instance, cls): - if instance is None: - return self - else: - value = self.func(instance) - object.__setattr__(instance, self.func.__name__, value) - return value - - class GetAttrGetItemMixin: """This mixin changes `__getattr__` to attempt a `__getitem__` call. From 1c73cc0bc7c88735210b58b7e6acc477d391786b Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 11 Feb 2022 16:29:22 -0500 Subject: [PATCH 2/3] Update copyright years --- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/join/join.py | 2 +- python/cudf/cudf/core/multiindex.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 1d8d04e897b..08ef159ec54 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. from __future__ import annotations diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b776c0217b9..5b60e8dbd1c 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. from __future__ import annotations diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 1496808a4d1..6d1f375b68c 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. """Base class for Frame types that have an index.""" from __future__ import annotations diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 960bfa155fb..c7e46cf0165 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from __future__ import annotations from typing import TYPE_CHECKING, Any, Callable, List, cast diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 56826b18d45..5e0cd2ca8cb 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. from __future__ import annotations From 67688259cb3302483ba70af3262fb4fc399b10c9 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 14 Feb 2022 14:32:01 -0500 Subject: [PATCH 3/3] Return a Tuple, not a List --- python/cudf/cudf/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index dad41712679..be556d005ea 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -90,22 +90,22 @@ def _num_rows(self) -> int: return len(self._data.columns[0]) @property - def _column_names(self) -> List[Any]: # TODO: List[str]? - return list(self._data.names) + def _column_names(self) -> Tuple[Any, ...]: # TODO: Tuple[str]? + return tuple(self._data.names) @property - def _index_names(self) -> List[Any]: # TODO: List[str]? + def _index_names(self) -> Optional[Tuple[Any, ...]]: # TODO: Tuple[str]? # TODO: Temporarily suppressing mypy warnings to avoid introducing bugs # by returning an empty list where one is not expected. return ( None # type: ignore if self._index is None - else self._index._data.names + else tuple(self._index._data.names) ) @property - def _columns(self) -> List[Any]: # TODO: List[Column]? - return list(self._data.columns) + def _columns(self) -> Tuple[Any, ...]: # TODO: Tuple[Column]? + return tuple(self._data.columns) def serialize(self): header = {