From 9d52c8ae7a21348d50d4ba425732cea20f39c4f3 Mon Sep 17 00:00:00 2001 From: Marlene <57748216+marlenezw@users.noreply.github.com> Date: Fri, 2 Apr 2021 08:09:32 +0200 Subject: [PATCH] Interval index and interval_range (#7182) This PR is a follow up to PR #6984. It adds the IntervalIndex and interval_range classes to CuDF. IntervalIndex is needed for the `cut` and `histogram` methods and closes issue #5376. The main files to take note of in this PR are `index.py` and the two cudf `__init__.py` files. The rest of the file changes are related to PR6984 for creating the interval dtype. Authors: - Marlene (https://github.com/marlenezw) - Michael Wang (https://github.com/isVoid) Approvers: - Keith Kraus (https://github.com/kkraus14) URL: https://github.com/rapidsai/cudf/pull/7182 --- python/cudf/cudf/__init__.py | 2 + python/cudf/cudf/core/__init__.py | 2 + python/cudf/cudf/core/column/column.py | 69 ++++++- python/cudf/cudf/core/column/interval.py | 37 +++- python/cudf/cudf/core/index.py | 244 +++++++++++++++++++++- python/cudf/cudf/core/scalar.py | 2 +- python/cudf/cudf/tests/test_index.py | 251 +++++++++++++++++++++++ 7 files changed, 591 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 94649069060..c8a4894f4be 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -18,6 +18,8 @@ from cudf.core import ( NA, CategoricalIndex, + interval_range, + IntervalIndex, DataFrame, DatetimeIndex, Float32Index, diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py index 59173cc0247..10abdaf0061 100644 --- a/python/cudf/cudf/core/__init__.py +++ b/python/cudf/cudf/core/__init__.py @@ -5,6 +5,8 @@ from cudf.core.dataframe import DataFrame, from_pandas, merge from cudf.core.index import ( CategoricalIndex, + interval_range, + IntervalIndex, DatetimeIndex, Float32Index, Float64Index, diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 4969a004ac0..6a1600d6461 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -42,6 +42,7 @@ from cudf.core.abc import Serializable from cudf.core.buffer import Buffer from cudf.core.dtypes import CategoricalDtype +from cudf.core.dtypes import IntervalDtype from cudf.utils import ioutils, utils from cudf.utils.dtypes import ( NUMERIC_TYPES, @@ -1046,11 +1047,7 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase: ) return self elif is_interval_dtype(self.dtype): - if not self.dtype == dtype: - raise NotImplementedError( - "Casting interval columns not currently supported" - ) - return self + return self.as_interval_column(dtype, **kwargs) elif is_decimal_dtype(dtype): return self.as_decimal_column(dtype, **kwargs) elif np.issubdtype(dtype, np.datetime64): @@ -1113,6 +1110,11 @@ def as_datetime_column( ) -> "cudf.core.column.DatetimeColumn": raise NotImplementedError + def as_interval_column( + self, dtype: Dtype, **kwargs + ) -> "cudf.core.column.IntervalColumn": + raise NotImplementedError + def as_timedelta_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.TimeDeltaColumn": @@ -1633,6 +1635,15 @@ def build_column( null_count=null_count, children=children, ) + elif is_interval_dtype(dtype): + return cudf.core.column.IntervalColumn( + dtype=dtype, + mask=mask, + size=size, + offset=offset, + children=children, + null_count=null_count, + ) elif is_struct_dtype(dtype): if size is None: raise TypeError("Must specify size") @@ -1714,6 +1725,52 @@ def build_categorical_column( return cast("cudf.core.column.CategoricalColumn", result) +def build_interval_column( + left_col, + right_col, + mask=None, + size=None, + offset=0, + null_count=None, + closed="right", +): + """ + Build an IntervalColumn + + Parameters + ---------- + left_col : Column + Column of values representing the left of the interval + right_col : Column + Column of representing the right of the interval + mask : Buffer + Null mask + size : int, optional + offset : int, optional + closed : {"left", "right", "both", "neither"}, default "right" + Whether the intervals are closed on the left-side, right-side, + both or neither. + """ + left = as_column(left_col) + right = as_column(right_col) + if closed not in {"left", "right", "both", "neither"}: + closed = "right" + if type(left_col) is not list: + dtype = IntervalDtype(left_col.dtype, closed) + else: + dtype = IntervalDtype("int64", closed) + size = len(left) + return build_column( + data=None, + dtype=dtype, + mask=mask, + size=size, + offset=offset, + null_count=null_count, + children=(left, right), + ) + + def as_column( arbitrary: Any, nan_as_null: bool = None, @@ -2102,7 +2159,7 @@ def as_column( data = as_column(sr, nan_as_null=nan_as_null) elif is_interval_dtype(dtype): sr = pd.Series(arbitrary, dtype="interval") - data = as_column(sr, nan_as_null=nan_as_null) + data = as_column(sr, nan_as_null=nan_as_null, dtype=dtype) else: data = as_column( _construct_array(arbitrary, dtype), diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index e9991bef071..d8bea6b1658 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -2,6 +2,8 @@ import pyarrow as pa import cudf from cudf.core.column import StructColumn +from cudf.core.dtypes import IntervalDtype +from cudf.utils.dtypes import is_interval_dtype class IntervalColumn(StructColumn): @@ -38,7 +40,7 @@ def closed(self): def from_arrow(self, data): new_col = super().from_arrow(data.storage) size = len(data) - dtype = cudf.core.dtypes.IntervalDtype.from_arrow(data.type) + dtype = IntervalDtype.from_arrow(data.type) mask = data.buffers()[0] if mask is not None: mask = cudf.utils.utils.pa_mask_buffer_to_mask(mask, len(data)) @@ -60,14 +62,17 @@ def from_arrow(self, data): def to_arrow(self): typ = self.dtype.to_arrow() - return pa.ExtensionArray.from_storage(typ, super().to_arrow()) + struct_arrow = super().to_arrow() + if len(struct_arrow) == 0: + # struct arrow is pa.struct array with null children types + # we need to make sure its children have non-null type + struct_arrow = pa.array([], typ.storage_type) + return pa.ExtensionArray.from_storage(typ, struct_arrow) def from_struct_column(self, closed="right"): return IntervalColumn( size=self.size, - dtype=cudf.core.dtypes.IntervalDtype( - self.dtype.fields["left"], closed - ), + dtype=IntervalDtype(self.dtype.fields["left"], closed), mask=self.base_mask, offset=self.offset, null_count=self.null_count, @@ -80,12 +85,28 @@ def copy(self, deep=True): struct_copy = super().copy(deep=deep) return IntervalColumn( size=struct_copy.size, - dtype=cudf.core.dtypes.IntervalDtype( - struct_copy.dtype.fields["left"], closed - ), + dtype=IntervalDtype(struct_copy.dtype.fields["left"], closed), mask=struct_copy.base_mask, offset=struct_copy.offset, null_count=struct_copy.null_count, children=struct_copy.base_children, closed=closed, ) + + def as_interval_column(self, dtype, **kwargs): + if is_interval_dtype(dtype): + # a user can directly input the string `interval` as the dtype + # when creating an interval series or interval dataframe + if dtype == "interval": + dtype = IntervalDtype(self.dtype.fields["left"], self.closed) + return IntervalColumn( + size=self.size, + dtype=dtype, + mask=self.mask, + offset=self.offset, + null_count=self.null_count, + children=self.children, + closed=dtype.closed, + ) + else: + raise ValueError("dtype must be IntervalDtype") diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5104629eee0..f65afb6a1d4 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -13,25 +13,32 @@ from pandas._config import get_option import cudf +from cudf._lib.filling import sequence from cudf._typing import DtypeObj from cudf.core.abc import Serializable from cudf.core.column import ( CategoricalColumn, ColumnBase, DatetimeColumn, + IntervalColumn, NumericalColumn, StringColumn, TimeDeltaColumn, + arange, column, ) from cudf.core.column.string import StringMethods as StringMethods +from cudf.core.dtypes import IntervalDtype from cudf.core.frame import Frame from cudf.utils import ioutils, utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( + find_common_type, is_categorical_dtype, + is_interval_dtype, is_list_like, is_mixed_with_object_dtype, + is_numerical_dtype, is_scalar, numeric_normalize_types, ) @@ -2111,6 +2118,10 @@ def __repr__(self): return "\n".join(lines) def __getitem__(self, index): + if type(self) == IntervalIndex: + raise NotImplementedError( + "Getting a scalar from an IntervalIndex is not yet supported" + ) res = self._values[index] if not isinstance(index, int): res = as_index(res) @@ -2635,7 +2646,8 @@ def inferred_freq(self): class CategoricalIndex(GenericIndex): - """An categorical of orderable values that represent the indices of another + """ + A categorical of orderable values that represent the indices of another Column Parameters @@ -2752,6 +2764,236 @@ def categories(self): return self._values.cat().categories +def interval_range( + start=None, end=None, periods=None, freq=None, name=None, closed="right", +) -> "IntervalIndex": + """ + Returns a fixed frequency IntervalIndex. + + Parameters + ---------- + start : numeric, default None + Left bound for generating intervals. + end : numeric , default None + Right bound for generating intervals. + periods : int, default None + Number of periods to generate + freq : numeric, default None + The length of each interval. Must be consistent + with the type of start and end + name : str, default None + Name of the resulting IntervalIndex. + closed : {"left", "right", "both", "neither"}, default "right" + Whether the intervals are closed on the left-side, right-side, + both or neither. + + Returns + ------- + IntervalIndex + + Examples + -------- + >>> import cudf + >>> import pandas as pd + >>> cudf.interval_range(start=0,end=5) + IntervalIndex([(0, 0], (1, 1], (2, 2], (3, 3], (4, 4], (5, 5]], + ...closed='right',dtype='interval') + >>> cudf.interval_range(start=0,end=10, freq=2,closed='left') + IntervalIndex([[0, 2), [2, 4), [4, 6), [6, 8), [8, 10)], + ...closed='left',dtype='interval') + >>> cudf.interval_range(start=0,end=10, periods=3,closed='left') + ...IntervalIndex([[0.0, 3.3333333333333335), + [3.3333333333333335, 6.666666666666667), + [6.666666666666667, 10.0)], + closed='left', + dtype='interval') + """ + if freq and periods and start and end: + raise ValueError( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + args = [ + cudf.Scalar(x) if x is not None else None + for x in (start, end, freq, periods) + ] + if any( + not is_numerical_dtype(x.dtype) if x is not None else False + for x in args + ): + raise ValueError("start, end, periods, freq must be numeric values.") + *rargs, periods = args + common_dtype = find_common_type([x.dtype for x in rargs if x]) + start, end, freq = rargs + periods = periods.astype("int64") if periods is not None else None + + if periods and not freq: + # if statement for mypy to pass + if end is not None and start is not None: + # divmod only supported on host side scalars + quotient, remainder = divmod((end - start).value, periods.value) + if remainder: + freq_step = cudf.Scalar((end - start) / periods) + else: + freq_step = cudf.Scalar(quotient) + if start.dtype != freq_step.dtype: + start = start.astype(freq_step.dtype) + bin_edges = sequence( + size=periods + 1, + init=start.device_value, + step=freq_step.device_value, + ) + left_col = bin_edges[:-1] + right_col = bin_edges[1:] + elif freq and periods: + if end: + start = end - (freq * periods) + if start: + end = freq * periods + start + if end is not None and start is not None: + left_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + end = end + 1 + start = start + freq + right_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + elif freq and not periods: + if end is not None and start is not None: + end = end - freq + 1 + left_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + end = end + freq + 1 + start = start + freq + right_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + elif start is not None and end is not None: + # if statements for mypy to pass + if freq: + left_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + else: + left_col = arange(start.value, end.value, dtype=common_dtype) + start = start + 1 + end = end + 1 + if freq: + right_col = arange( + start.value, end.value, freq.value, dtype=common_dtype + ) + else: + right_col = arange(start.value, end.value, dtype=common_dtype) + else: + raise ValueError( + "Of the four parameters: start, end, periods, and " + "freq, at least two must be specified" + ) + if len(right_col) == 0 or len(left_col) == 0: + dtype = IntervalDtype("int64", closed) + data = column.column_empty_like_same_mask(left_col, dtype) + return cudf.IntervalIndex(data, closed=closed) + + interval_col = column.build_interval_column( + left_col, right_col, closed=closed + ) + return IntervalIndex(interval_col) + + +class IntervalIndex(GenericIndex): + """ + Immutable index of intervals that are closed on the same side. + + Parameters + ---------- + data : array-like (1-dimensional) + Array-like containing Interval objects from which to build the + IntervalIndex. + closed : {"left", "right", "both", "neither"}, default "right" + Whether the intervals are closed on the left-side, right-side, + both or neither. + dtype : dtype or None, default None + If None, dtype will be inferred. + copy : bool, default False + Copy the input data. + name : object, optional + Name to be stored in the index. + + Returns + ------- + IntervalIndex + """ + + def __new__( + cls, data, closed=None, dtype=None, copy=False, name=None, + ) -> "IntervalIndex": + if copy: + data = column.as_column(data, dtype=dtype).copy() + out = Frame.__new__(cls) + kwargs = _setdefault_name(data, name=name) + if isinstance(data, IntervalColumn): + data = data + elif isinstance(data, pd.Series) and (is_interval_dtype(data.dtype)): + data = column.as_column(data, data.dtype) + elif isinstance(data, (pd._libs.interval.Interval, pd.IntervalIndex)): + data = column.as_column(data, dtype=dtype,) + elif not data: + dtype = IntervalDtype("int64", closed) + data = column.column_empty_like_same_mask( + column.as_column(data), dtype + ) + else: + data = column.as_column(data) + data.dtype.closed = closed + + out._initialize(data, **kwargs) + return out + + def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None): + """ + Construct an IntervalIndex from an array of splits. + + Parameters + --------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {"left", "right", "both", "neither"}, default "right" + Whether the intervals are closed on the left-side, right-side, + both or neither. + copy : bool, default False + Copy the input data. + name : object, optional + Name to be stored in the index. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + IntervalIndex + + Examples + -------- + >>> import cudf + >>> import pandas as pd + >>> cudf.IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + closed='right', + dtype='interval[int64]') + """ + if copy: + breaks = column.as_column(breaks, dtype=dtype).copy() + left_col = breaks[:-1:] + right_col = breaks[+1::] + + interval_col = column.build_interval_column( + left_col, right_col, closed=closed + ) + + return IntervalIndex(interval_col, name=name) + + class StringIndex(GenericIndex): """String defined indices into another Column diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 5514e655211..d879b2ec4e2 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -358,7 +358,7 @@ def _dispatch_scalar_unaop(self, op): return getattr(self.value, op)() def astype(self, dtype): - return Scalar(self.device_value, dtype) + return Scalar(self.value, dtype) class _NAType(object): diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 688efef555b..21a431dd540 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -17,6 +17,7 @@ DatetimeIndex, GenericIndex, Int64Index, + IntervalIndex, RangeIndex, as_index, ) @@ -1360,6 +1361,256 @@ def test_categorical_index_basic(data, categories, dtype, ordered, name): assert_eq(pindex, gindex) +INTERVAL_BOUNDARY_TYPES = [ + int, + np.int8, + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + cudf.Scalar, +] + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("start", [0, 1, 2, 3]) +@pytest.mark.parametrize("end", [4, 5, 6, 7]) +def test_interval_range_basic(start, end, closed): + pindex = pd.interval_range(start=start, end=end, closed=closed) + gindex = cudf.interval_range(start=start, end=end, closed=closed) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("start_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("end_t", INTERVAL_BOUNDARY_TYPES) +def test_interval_range_dtype_basic(start_t, end_t): + start, end = start_t(24), end_t(42) + start_val = start.value if isinstance(start, cudf.Scalar) else start + end_val = end.value if isinstance(end, cudf.Scalar) else end + pindex = pd.interval_range(start=start_val, end=end_val, closed="left") + gindex = cudf.interval_range(start=start, end=end, closed="left") + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("start", [0]) +@pytest.mark.parametrize("end", [0]) +def test_interval_range_empty(start, end, closed): + pindex = pd.interval_range(start=start, end=end, closed=closed) + gindex = cudf.interval_range(start=start, end=end, closed=closed) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("freq", [1, 2, 3]) +@pytest.mark.parametrize("start", [0, 1, 2, 3, 5]) +@pytest.mark.parametrize("end", [6, 8, 10, 43, 70]) +def test_interval_range_freq_basic(start, end, freq, closed): + pindex = pd.interval_range(start=start, end=end, freq=freq, closed=closed) + gindex = cudf.interval_range( + start=start, end=end, freq=freq, closed=closed + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("start_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("end_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("freq_t", INTERVAL_BOUNDARY_TYPES) +def test_interval_range_freq_basic_dtype(start_t, end_t, freq_t): + start, end, freq = start_t(5), end_t(70), freq_t(3) + start_val = start.value if isinstance(start, cudf.Scalar) else start + end_val = end.value if isinstance(end, cudf.Scalar) else end + freq_val = freq.value if isinstance(freq, cudf.Scalar) else freq + pindex = pd.interval_range( + start=start_val, end=end_val, freq=freq_val, closed="left" + ) + gindex = cudf.interval_range( + start=start, end=end, freq=freq, closed="left" + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("periods", [1, 1.0, 2, 2.0, 3.0, 3]) +@pytest.mark.parametrize("start", [0, 0.0, 1.0, 1, 2, 2.0, 3.0, 3]) +@pytest.mark.parametrize("end", [4, 4.0, 5.0, 5, 6, 6.0, 7.0, 7]) +def test_interval_range_periods_basic(start, end, periods, closed): + pindex = pd.interval_range( + start=start, end=end, periods=periods, closed=closed + ) + gindex = cudf.interval_range( + start=start, end=end, periods=periods, closed=closed + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("start_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("end_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("periods_t", INTERVAL_BOUNDARY_TYPES) +def test_interval_range_periods_basic_dtype(start_t, end_t, periods_t): + start, end, periods = start_t(0), end_t(4), periods_t(1.0) + start_val = start.value if isinstance(start, cudf.Scalar) else start + end_val = end.value if isinstance(end, cudf.Scalar) else end + periods_val = ( + periods.value if isinstance(periods, cudf.Scalar) else periods + ) + pindex = pd.interval_range( + start=start_val, end=end_val, periods=periods_val, closed="left" + ) + gindex = cudf.interval_range( + start=start, end=end, periods=periods, closed="left" + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("periods", [1, 2, 3]) +@pytest.mark.parametrize("freq", [1, 2, 3, 4]) +@pytest.mark.parametrize("end", [4, 8, 9, 10]) +def test_interval_range_periods_freq_end(end, freq, periods, closed): + pindex = pd.interval_range( + end=end, freq=freq, periods=periods, closed=closed + ) + gindex = cudf.interval_range( + end=end, freq=freq, periods=periods, closed=closed + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("periods_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("freq_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("end_t", INTERVAL_BOUNDARY_TYPES) +def test_interval_range_periods_freq_end_dtype(periods_t, freq_t, end_t): + periods, freq, end = periods_t(2), freq_t(3), end_t(10) + freq_val = freq.value if isinstance(freq, cudf.Scalar) else freq + end_val = end.value if isinstance(end, cudf.Scalar) else end + periods_val = ( + periods.value if isinstance(periods, cudf.Scalar) else periods + ) + pindex = pd.interval_range( + end=end_val, freq=freq_val, periods=periods_val, closed="left" + ) + gindex = cudf.interval_range( + end=end, freq=freq, periods=periods, closed="left" + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +@pytest.mark.parametrize("periods", [1, 2, 3]) +@pytest.mark.parametrize("freq", [1, 2, 3, 4]) +@pytest.mark.parametrize("start", [1, 4, 9, 12]) +def test_interval_range_periods_freq_start(start, freq, periods, closed): + pindex = pd.interval_range( + start=start, freq=freq, periods=periods, closed=closed + ) + gindex = cudf.interval_range( + start=start, freq=freq, periods=periods, closed=closed + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("periods_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("freq_t", INTERVAL_BOUNDARY_TYPES) +@pytest.mark.parametrize("start_t", INTERVAL_BOUNDARY_TYPES) +def test_interval_range_periods_freq_start_dtype(periods_t, freq_t, start_t): + periods, freq, start = periods_t(2), freq_t(3), start_t(9) + freq_val = freq.value if isinstance(freq, cudf.Scalar) else freq + start_val = start.value if isinstance(start, cudf.Scalar) else start + periods_val = ( + periods.value if isinstance(periods, cudf.Scalar) else periods + ) + pindex = pd.interval_range( + start=start_val, freq=freq_val, periods=periods_val, closed="left" + ) + gindex = cudf.interval_range( + start=start, freq=freq, periods=periods, closed="left" + ) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"]) +@pytest.mark.parametrize( + "data", + [ + ([pd.Interval(30, 50)]), + ([pd.Interval(0, 3), pd.Interval(1, 7)]), + ([pd.Interval(0.2, 60.3), pd.Interval(1, 7), pd.Interval(0, 0)]), + ([]), + ], +) +def test_interval_index_basic(data, closed): + pindex = pd.IntervalIndex(data, closed=closed) + gindex = IntervalIndex(data, closed=closed) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"]) +def test_interval_index_empty(closed): + pindex = pd.IntervalIndex([], closed=closed) + gindex = IntervalIndex([], closed=closed) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"]) +@pytest.mark.parametrize( + "data", + [ + ([pd.Interval(1, 6), pd.Interval(1, 10), pd.Interval(1, 3)]), + ( + [ + pd.Interval(3.5, 6.0), + pd.Interval(1.0, 7.0), + pd.Interval(0.0, 10.0), + ] + ), + ( + [ + pd.Interval(50, 100, closed="left"), + pd.Interval(1.0, 7.0, closed="left"), + pd.Interval(16, 322, closed="left"), + ] + ), + ( + [ + pd.Interval(50, 100, closed="right"), + pd.Interval(1.0, 7.0, closed="right"), + pd.Interval(16, 322, closed="right"), + ] + ), + ], +) +def test_interval_index_many_params(data, closed): + + pindex = pd.IntervalIndex(data, closed=closed) + gindex = IntervalIndex(data, closed=closed) + + assert_eq(pindex, gindex) + + +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +def test_interval_index_from_breaks(closed): + breaks = [0, 3, 6, 10] + pindex = pd.IntervalIndex.from_breaks(breaks, closed=closed) + gindex = IntervalIndex.from_breaks(breaks, closed=closed) + + assert_eq(pindex, gindex) + + @pytest.mark.parametrize("n", [0, 2, 5, 10, None]) @pytest.mark.parametrize("frac", [0.1, 0.5, 1, 2, None]) @pytest.mark.parametrize("replace", [True, False])