rapidsai · rapids-bot · Mar 21, 2022 · Mar 14, 2022 · Mar 14, 2022 · Mar 14, 2022
@@ -675,7 +675,11 @@ def append(self, other: ColumnBase) -> ColumnBase:
         return concat_columns([self, as_column(other)])
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self,
+        q: np.ndarray,
+        interpolation: str,
+        exact: bool,
+        return_scalar: bool,
     ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 

@@ -6,7 +6,6 @@
 import locale
 import re
 from locale import nl_langinfo
-from numbers import Number
 from types import SimpleNamespace
 from typing import Any, Mapping, Sequence, Union, cast
 
@@ -373,12 +372,19 @@ def median(self, skipna: bool = None) -> pd.Timestamp:
         )
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self,
+        q: np.ndarray,
+        interpolation: str,
+        exact: bool,
+        return_scalar: bool,
     ) -> ColumnBase:
         result = self.as_numerical.quantile(
-            q=q, interpolation=interpolation, exact=exact
+            q=q,
+            interpolation=interpolation,
+            exact=exact,
+            return_scalar=return_scalar,
         )
-        if isinstance(q, Number):
+        if return_scalar:
             return pd.Timestamp(result, unit=self.time_unit)
         return result.astype(self.dtype)
 

@@ -3,8 +3,7 @@
 
 from __future__ import annotations
 
-from numbers import Number
-from typing import Sequence, Union
+from typing import cast
 
 import numpy as np
 
@@ -92,18 +91,28 @@ def skew(self, skipna: bool = None) -> ScalarLike:
         return skew
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self,
+        q: np.ndarray,
+        interpolation: str,
+        exact: bool,
+        return_scalar: bool,
     ) -> NumericalBaseColumn:
-        if isinstance(q, Number) or cudf.api.types.is_list_like(q):
-            np_array_q = np.asarray(q)
-            if np.logical_or(np_array_q < 0, np_array_q > 1).any():
-                raise ValueError(
-                    "percentiles should all be in the interval [0, 1]"
-                )
+        if np.logical_or(q < 0, q > 1).any():
+            raise ValueError(
+                "percentiles should all be in the interval [0, 1]"
+            )
         # Beyond this point, q either being scalar or list-like
         # will only have values in range [0, 1]
-        result = self._numeric_quantile(q, interpolation, exact)
-        if isinstance(q, Number):
+        if len(self) == 0:
+            result = cast(
+                NumericalBaseColumn,
+                cudf.core.column.column_empty(
+                    row_count=len(q), dtype=self.dtype, masked=True
+                ),
+            )
+        else:
+            result = self._numeric_quantile(q, interpolation, exact)
+        if return_scalar:
             return (
                 cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
                 if result[0] is cudf.NA
@@ -137,20 +146,24 @@ def median(self, skipna: bool = None) -> NumericalBaseColumn:
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
         # enforce linear in case the default ever changes
-        return self.quantile(0.5, interpolation="linear", exact=True)
+        return self.quantile(
+            np.array([0.5]),
+            interpolation="linear",
+            exact=True,
+            return_scalar=True,
+        )
 
     def _numeric_quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self, q: np.ndarray, interpolation: str, exact: bool
     ) -> NumericalBaseColumn:
-        quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
         # get sorted indices and exclude nulls
         sorted_indices = self.as_frame()._get_sorted_inds(
             ascending=True, na_position="first"
         )
         sorted_indices = sorted_indices[self.null_count :]
 
         return libcudf.quantiles.quantile(
-            self, quant, interpolation, sorted_indices, exact
+            self, q, interpolation, sorted_indices, exact
         )
 
     def cov(self, other: NumericalBaseColumn) -> float:

@@ -3,8 +3,7 @@
 from __future__ import annotations
 
 import datetime as dt
-from numbers import Number
-from typing import Any, Sequence, Tuple, Union, cast
+from typing import Any, Sequence, Tuple, cast
 
 import numpy as np
 import pandas as pd
@@ -330,12 +329,19 @@ def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self,
+        q: np.ndarray,
+        interpolation: str,
+        exact: bool,
+        return_scalar: bool,
     ) -> "column.ColumnBase":
         result = self.as_numerical.quantile(
-            q=q, interpolation=interpolation, exact=exact
+            q=q,
+            interpolation=interpolation,
+            exact=exact,
+            return_scalar=return_scalar,
         )
-        if isinstance(q, Number):
+        if return_scalar:
             return pd.Timedelta(result, unit=self.time_unit)
         return result.astype(self.dtype)
 

@@ -7,7 +7,6 @@
 import pickle
 import warnings
 from collections import abc as abc
-from numbers import Number
 from shutil import get_terminal_size
 from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union
 
@@ -2746,21 +2745,30 @@ def quantile(
         dtype: float64
         """
 
-        result = self._column.quantile(q, interpolation, exact)
+        return_scalar = is_scalar(q)
+        if return_scalar:
+            np_array_q = np.asarray([float(q)])
+        else:
+            try:
+                np_array_q = np.asarray(q)
+            except TypeError:
+                try:
+                    np_array_q = cudf.core.column.as_column(q).values_host
+                except TypeError:
+                    raise TypeError(
+                        f"q must be a scalar or array-like, got {type(q)}"
+                    )
 
-        if isinstance(q, Number):
-            return result
+        result = self._column.quantile(
+            np_array_q, interpolation, exact, return_scalar=return_scalar
+        )
 
-        if quant_index:
-            index = np.asarray(q)
-            if len(self) == 0:
-                result = column_empty_like(
-                    index, dtype=self.dtype, masked=True, newsize=len(index),
-                )
-        else:
-            index = None
+        if return_scalar:
+            return result
 
-        return Series(result, index=index, name=self.name)
+        return Series(
+            result, index=np_array_q if quant_index else None, name=self.name
+        )
 
     @docutils.doc_describe()
     @_cudf_nvtx_annotate

@@ -1,7 +1,12 @@
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
+import re
+
 import pandas as pd
+import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
 def test_single_q():
@@ -46,3 +51,30 @@ def test_with_multiindex():
     gdf_q = gdf.quantiles(q, interpolation="nearest")
 
     assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+@pytest.mark.parametrize("q", [2, [1, 2, 3]])
+def test_quantile_range_error(q):
+    ps = pd.Series([1, 2, 3])
+    gs = cudf.from_pandas(ps)
+    assert_exceptions_equal(
+        lfunc=ps.quantile,
+        rfunc=gs.quantile,
+        lfunc_args_and_kwargs=([q],),
+        rfunc_args_and_kwargs=([q],),
+        expected_error_message=re.escape(
+            "percentiles should all be in the interval [0, 1]"
+        ),
+    )
+
+
+def test_quantile_q_type():
+    gs = cudf.Series([1, 2, 3])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "q must be a scalar or array-like, got <class "
+            "'cudf.core.dataframe.DataFrame'>"
+        ),
+    ):
+        gs.quantile(cudf.DataFrame())
@@ -2,6 +2,7 @@
 
 from concurrent.futures import ThreadPoolExecutor
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -201,13 +202,25 @@ def test_approx_quantiles_int():
 
 
 @pytest.mark.parametrize("data", [[], [1, 2, 3, 10, 326497]])
-@pytest.mark.parametrize("q", [[], 0.5, 1, 0.234, [0.345], [0.243, 0.5, 1]])
+@pytest.mark.parametrize(
+    "q",
+    [
+        [],
+        0.5,
+        1,
+        0.234,
+        [0.345],
+        [0.243, 0.5, 1],
+        np.array([0.5, 1]),
+        cp.array([0.5, 1]),
+    ],
+)
 def test_misc_quantiles(data, q):
 
     pdf_series = cudf.utils.utils._create_pandas_series(data=data)
     gdf_series = cudf.Series(data)
 
-    expected = pdf_series.quantile(q)
+    expected = pdf_series.quantile(q.get() if isinstance(q, cp.ndarray) else q)
     actual = gdf_series.quantile(q)
     assert_eq(expected, actual)