From 287dced7c23c3861333f30cbcfeea306211725e8 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 1 Dec 2021 15:27:20 -0800 Subject: [PATCH 01/32] create new pr --- python/cudf/cudf/core/dataframe.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c0cb6f1917f..c35ce1153df 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2633,6 +2633,13 @@ def insert(self, loc, name, value): self._data.insert(name, value, loc=loc) + def diff(self): + """ + Calculates the difference of a Dataframe element compared with + another element in the Dataframe (default is element in previous row). + """ + pass + def drop( self, labels=None, From 721cbaa5e8ef661f4125fe8b7d3c4175bda4d3c2 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 1 Dec 2021 17:40:50 -0800 Subject: [PATCH 02/32] docstrings --- python/cudf/cudf/core/dataframe.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c35ce1153df..ea3e858bff5 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2636,7 +2636,21 @@ def insert(self, loc, name, value): def diff(self): """ Calculates the difference of a Dataframe element compared with - another element in the Dataframe (default is element in previous row). + another element in the Dataframe, treating each column independently + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, + accepts negative values. + axis : {0 or ‘index’, 1 or ‘columns’}, default 0 + Take difference over rows (0) or columns (1). + Only row-wise (0) shift is supported. + + Returns + ------- + DataFrame + First differences of the DataFrame. """ pass From 4aa42c92497d3890cd50eae5a984d97b42652e38 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 1 Dec 2021 18:57:00 -0800 Subject: [PATCH 03/32] added df.diff() method. need to add tests --- python/cudf/cudf/core/dataframe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ea3e858bff5..6dcace87674 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2633,7 +2633,7 @@ def insert(self, loc, name, value): self._data.insert(name, value, loc=loc) - def diff(self): + def diff(self, periods=1, axis=0): """ Calculates the difference of a Dataframe element compared with another element in the Dataframe, treating each column independently @@ -2652,7 +2652,8 @@ def diff(self): DataFrame First differences of the DataFrame. """ - pass + result = self - self.shift(periods=periods) + return result def drop( self, From e245a63826eec56c1f48277dabe0e06792391b51 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 2 Dec 2021 20:55:14 -0800 Subject: [PATCH 04/32] added checks for null values and non-numeric dtypes and axis --- python/cudf/cudf/core/dataframe.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 6dcace87674..3642d4ff493 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2651,7 +2651,30 @@ def diff(self, periods=1, axis=0): ------- DataFrame First differences of the DataFrame. + + Notes + ----- + Diff currently only supports float and integer dtype columns with + no null values. + + Examples + -------- + """ + + if not axis == 0: + raise NotImplementedError("Only axis=0 is supported.") + + if self.isna().any().any(): + raise AssertionError( + "Diff currently requires columns with no null values" + ) + + if not np.issubdtype(self.dtypes[0], np.number): + raise NotImplementedError( + "Diff currently only supports numeric dtypes" + ) + result = self - self.shift(periods=periods) return result From 0f2bc48088fdf35e8ecae6571a07bc6812d96c68 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 2 Dec 2021 21:00:02 -0800 Subject: [PATCH 05/32] added tests, all passing. ready for initial review --- python/cudf/cudf/tests/test_dataframe.py | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d555b5c4033..f99232e7d87 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9039,3 +9039,50 @@ def test_pearson_corr_multiindex_dataframe(): expected = gdf.to_pandas().groupby(level="a").corr("pearson") assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "data", + [ + np.random.normal(-100, 100, 1000), + np.random.randint(-50, 50, 1000), + np.random.random_sample((4, 4)), + np.random.uniform(10.5, 75.5, (10,)), + np.array([1.123, 2.343, 5.890, 0.0]), + ], +) +@pytest.mark.parametrize("periods", [-1, -2, -3, -4, 1, 2, 3, 4]) +def test_diff_dataframe_valid(data, periods): + gdf = cudf.DataFrame(data) + pdf = gdf.to_pandas() + + actual = gdf.diff(periods=periods, axis=0) + expected = pdf.diff(periods=periods, axis=0) + + assert_eq( + expected, actual, check_dtype=False, + ) + + +def test_diff_dataframe_invalid_axis(): + with pytest.raises(NotImplementedError, match="Only axis=0 is supported."): + gdf = cudf.DataFrame(np.random.random_sample((4, 4))) + gdf.diff(periods=1, axis=1) + + +def test_diff_dataframe_null_columns(): + with pytest.raises( + AssertionError, + match="Diff currently requires columns with no null values", + ): + gdf = cudf.DataFrame([1.123, 2.343, np.nan, None, 6.072, None]) + gdf.diff(periods=4, axis=0) + + +def test_diff_dataframe_non_numeric_dtypes(): + with pytest.raises( + NotImplementedError, + match="Diff currently only supports numeric dtypes", + ): + gdf = cudf.DataFrame(["a", "b", "c", "d", "e"]) + gdf.diff(periods=2, axis=0) From 2b81389376ad7c1be2aaed9ff063bd74cbcdac57 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 2 Dec 2021 21:05:13 -0800 Subject: [PATCH 06/32] added example to doctrings --- python/cudf/cudf/core/dataframe.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 3642d4ff493..57a20e89dd8 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2659,6 +2659,26 @@ def diff(self, periods=1, axis=0): Examples -------- + >>> import cudf + >>> gdf = cudf.DataFrame({'a': [1, 2, 3, 4, 5, 6], + 'b': [1, 1, 2, 3, 5, 8], + 'c': [1, 4, 9, 16, 25, 36]}) + >>> gdf + a b c + 0 1 1 1 + 1 2 1 4 + 2 3 2 9 + 3 4 3 16 + 4 5 5 25 + 5 6 8 36 + >>> gdf.diff(periods=2) + a b c + 0 + 1 + 2 2 1 8 + 3 2 2 12 + 4 2 3 16 + 5 2 5 20 """ From 7cdb55a4a98dd793cbb062865e8cc90c7cafd360 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 2 Dec 2021 21:45:01 -0800 Subject: [PATCH 07/32] minor edits to tests --- python/cudf/cudf/tests/test_dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index f99232e7d87..252ce696662 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9044,10 +9044,10 @@ def test_pearson_corr_multiindex_dataframe(): @pytest.mark.parametrize( "data", [ - np.random.normal(-100, 100, 1000), - np.random.randint(-50, 50, 1000), + np.random.normal(-100, 100, (50, 50)), + np.random.randint(-50, 50, (25, 30)), np.random.random_sample((4, 4)), - np.random.uniform(10.5, 75.5, (10,)), + np.random.uniform(10.5, 75.5, (10, 6)), np.array([1.123, 2.343, 5.890, 0.0]), ], ) From 1e22293487f0f428e0736aa7c899a72380f37a49 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 2 Dec 2021 22:05:54 -0800 Subject: [PATCH 08/32] . --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 252ce696662..29a1c4fba25 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9051,7 +9051,7 @@ def test_pearson_corr_multiindex_dataframe(): np.array([1.123, 2.343, 5.890, 0.0]), ], ) -@pytest.mark.parametrize("periods", [-1, -2, -3, -4, 1, 2, 3, 4]) +@pytest.mark.parametrize("periods", range(-4, 5)) def test_diff_dataframe_valid(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() From 3bd80173171e3c979ed22ce683f6e9f2a99b420f Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 8 Dec 2021 11:22:39 -0800 Subject: [PATCH 09/32] checks dtypes for all columns in a dataframe --- python/cudf/cudf/core/dataframe.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 6f8825265d3..aa0a62b429b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2690,12 +2690,24 @@ def diff(self, periods=1, axis=0): "Diff currently requires columns with no null values" ) - if not np.issubdtype(self.dtypes[0], np.number): + if not (np.issubdtype(i, np.number) for i in self.dtypes): raise NotImplementedError( "Diff currently only supports numeric dtypes" ) - result = self - self.shift(periods=periods) + try: + result = self - self.shift(periods=periods) + except TypeError as e: + if ( + "sub operator not supported between" + "" + "and " in str(e) + ): + raise NotImplementedError( + "Diff currently only supports numeric dtypes" + ) + raise + return result def drop( From 496e837a5bc1e8ae1c9bdac013102116d4c54847 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 8 Dec 2021 12:12:13 -0800 Subject: [PATCH 10/32] split long string to multiple lines --- python/cudf/cudf/core/dataframe.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 90ccd8ea30a..19e4f7bf9aa 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2699,9 +2699,10 @@ def diff(self, periods=1, axis=0): result = self - self.shift(periods=periods) except TypeError as e: if ( - "sub operator not supported between" - "" - "and " in str(e) + "sub operator not supported between\ + \ + and " + in str(e) ): raise NotImplementedError( "Diff currently only supports numeric dtypes" From 2767b753870d44fec4a4f90a0761e7c0d0b4e6bc Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 8 Dec 2021 14:55:09 -0800 Subject: [PATCH 11/32] addressed review: use has_nulls to check for nans --- python/cudf/cudf/core/dataframe.py | 9 ++------- python/cudf/cudf/tests/test_dataframe.py | 13 +++---------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 19e4f7bf9aa..ad05bc5d32a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2685,7 +2685,7 @@ def diff(self, periods=1, axis=0): if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") - if self.isna().any().any(): + if (self[col].has_nulls for col in self._data) is True: raise AssertionError( "Diff currently requires columns with no null values" ) @@ -2698,12 +2698,7 @@ def diff(self, periods=1, axis=0): try: result = self - self.shift(periods=periods) except TypeError as e: - if ( - "sub operator not supported between\ - \ - and " - in str(e) - ): + if "sub operator not supported" in str(e): raise NotImplementedError( "Diff currently only supports numeric dtypes" ) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a24c0e0cf55..32629185875 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9069,10 +9069,12 @@ def test_dataframe_add_suffix(): np.random.random_sample((4, 4)), np.random.uniform(10.5, 75.5, (10, 6)), np.array([1.123, 2.343, 5.890, 0.0]), + [np.nan, None, np.nan, None], + {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]}, ], ) @pytest.mark.parametrize("periods", range(-4, 5)) -def test_diff_dataframe_valid(data, periods): +def test_diff_dataframe(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() @@ -9090,15 +9092,6 @@ def test_diff_dataframe_invalid_axis(): gdf.diff(periods=1, axis=1) -def test_diff_dataframe_null_columns(): - with pytest.raises( - AssertionError, - match="Diff currently requires columns with no null values", - ): - gdf = cudf.DataFrame([1.123, 2.343, np.nan, None, 6.072, None]) - gdf.diff(periods=4, axis=0) - - def test_diff_dataframe_non_numeric_dtypes(): with pytest.raises( NotImplementedError, From 72d14bd16ca5e4fd12d0b4c5cd0be7ef2f54f333 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 8 Dec 2021 15:01:59 -0800 Subject: [PATCH 12/32] removed nan-constraints check --- python/cudf/cudf/core/dataframe.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ad05bc5d32a..05c314a50db 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2654,8 +2654,7 @@ def diff(self, periods=1, axis=0): Notes ----- - Diff currently only supports float and integer dtype columns with - no null values. + Diff currently only supports float and integer dtype columns Examples -------- @@ -2685,11 +2684,6 @@ def diff(self, periods=1, axis=0): if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") - if (self[col].has_nulls for col in self._data) is True: - raise AssertionError( - "Diff currently requires columns with no null values" - ) - if not (np.issubdtype(i, np.number) for i in self.dtypes): raise NotImplementedError( "Diff currently only supports numeric dtypes" From 798c642c8d4b64baa226bdb2605c2534627a36cb Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 9 Dec 2021 14:51:21 -0800 Subject: [PATCH 13/32] added tests for mix of numeric and non-numeric dtypes --- python/cudf/cudf/tests/test_dataframe.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 32629185875..7c883f7c34f 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9092,10 +9092,21 @@ def test_diff_dataframe_invalid_axis(): gdf.diff(periods=1, axis=1) -def test_diff_dataframe_non_numeric_dtypes(): +@pytest.mark.parametrize( + "data", + [ + { + "int_col": [1, 2, 3, 4, 5], + "float_col": [1.0, 2.0, 3.0, 4.0, 5.0], + "string_col": ["a", "b", "c", "d", "e"], + }, + ["a", "b", "c", "d", "e"], + ], +) +def test_diff_dataframe_numeric_and_non_numeric_dypes(data): with pytest.raises( NotImplementedError, match="Diff currently only supports numeric dtypes", ): - gdf = cudf.DataFrame(["a", "b", "c", "d", "e"]) + gdf = cudf.DataFrame(data) gdf.diff(periods=2, axis=0) From df2140e6a150259d26e6226f36bd42574d0a04d3 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Fri, 10 Dec 2021 13:46:24 -0800 Subject: [PATCH 14/32] addressed reviews by brandon --- python/cudf/cudf/core/dataframe.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 05c314a50db..d6de31089f1 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2684,19 +2684,12 @@ def diff(self, periods=1, axis=0): if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") - if not (np.issubdtype(i, np.number) for i in self.dtypes): + if not all(is_numeric_dtype(i) for i in self.dtypes): raise NotImplementedError( "Diff currently only supports numeric dtypes" ) - try: - result = self - self.shift(periods=periods) - except TypeError as e: - if "sub operator not supported" in str(e): - raise NotImplementedError( - "Diff currently only supports numeric dtypes" - ) - raise + result = self - self.shift(periods=periods) return result From 91412257b4cb4991ff17b4232acf98b1b1365031 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 13 Dec 2021 20:20:23 -0800 Subject: [PATCH 15/32] numeric types docs- fix --- python/cudf/cudf/core/dataframe.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d6de31089f1..ce10f0aefdb 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2654,7 +2654,7 @@ def diff(self, periods=1, axis=0): Notes ----- - Diff currently only supports float and integer dtype columns + Diff currently only supports numeric dtype columns Examples -------- @@ -2689,9 +2689,7 @@ def diff(self, periods=1, axis=0): "Diff currently only supports numeric dtypes" ) - result = self - self.shift(periods=periods) - - return result + return self - self.shift(periods=periods) def drop( self, From 79211f16a9dfc52d7fb9ed7516798e8c915ad9ac Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 13 Dec 2021 20:23:53 -0800 Subject: [PATCH 16/32] added test cases for decimal64 dtypes --- python/cudf/cudf/tests/test_dataframe.py | 25 ++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 7c883f7c34f..279d60bdabc 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9064,17 +9064,16 @@ def test_dataframe_add_suffix(): @pytest.mark.parametrize( "data", [ - np.random.normal(-100, 100, (50, 50)), np.random.randint(-50, 50, (25, 30)), np.random.random_sample((4, 4)), - np.random.uniform(10.5, 75.5, (10, 6)), np.array([1.123, 2.343, 5.890, 0.0]), [np.nan, None, np.nan, None], + [True, False, True, False, False], {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]}, ], ) @pytest.mark.parametrize("periods", range(-4, 5)) -def test_diff_dataframe(data, periods): +def test_diff_dataframe_numeric_dtypes(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() @@ -9086,6 +9085,24 @@ def test_diff_dataframe(data, periods): ) +@pytest.mark.parametrize( + ("precision", "scale"), [(5, 2), (4, 3), (8, 5), (3, 1), (6, 4)], +) +def test_diff_decimal64_dtype(precision, scale): + gdf = cudf.DataFrame( + np.random.uniform(10.5, 75.5, (10, 6)), + dtype=cudf.Decimal64Dtype(precision=precision, scale=scale), + ) + pdf = gdf.to_pandas() + + actual = gdf.diff() + expected = pdf.diff() + + assert_eq( + expected, actual, check_dtype=False, + ) + + def test_diff_dataframe_invalid_axis(): with pytest.raises(NotImplementedError, match="Only axis=0 is supported."): gdf = cudf.DataFrame(np.random.random_sample((4, 4))) @@ -9103,7 +9120,7 @@ def test_diff_dataframe_invalid_axis(): ["a", "b", "c", "d", "e"], ], ) -def test_diff_dataframe_numeric_and_non_numeric_dypes(data): +def test_diff_dataframe_non_numeric_dypes(data): with pytest.raises( NotImplementedError, match="Diff currently only supports numeric dtypes", From 8df6881056235645e568ee3ef5b14670089f7ad3 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Fri, 7 Jan 2022 12:51:46 -0800 Subject: [PATCH 17/32] wip: moved binary_operator to DecimalBaseColumn --- python/cudf/cudf/core/column/decimal.py | 83 +++++++++++++------------ 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 7037b8e6f36..254882383c4 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -45,6 +45,48 @@ def as_decimal_column( return self return libcudf.unary.cast(self, dtype) + def binary_operator(self, op, other, reflect=False): + if reflect: + self, other = other, self + + # Binary Arithmetics between decimal columns. `Scale` and `precision` + # are computed outside of libcudf + if op in ("add", "sub", "mul", "div"): + scale = _binop_scale(self.dtype, other.dtype, op) + output_type = Decimal64Dtype( + scale=scale, precision=Decimal64Dtype.MAX_PRECISION + ) + result = libcudf.binaryop.binaryop(self, other, op, output_type) + result.dtype.precision = _binop_precision( + self.dtype, other.dtype, op + ) + elif op in ("eq", "ne", "lt", "gt", "le", "ge"): + if not isinstance( + other, + ( + Decimal32Column, + Decimal64Column, + cudf.core.column.NumericalColumn, + cudf.Scalar, + ), + ): + raise TypeError( + f"Operator {op} not supported between" + f"{str(type(self))} and {str(type(other))}" + ) + if isinstance( + other, cudf.core.column.NumericalColumn + ) and not is_integer_dtype(other.dtype): + raise TypeError( + f"Only decimal and integer column is supported for {op}." + ) + if isinstance(other, cudf.core.column.NumericalColumn): + other = other.as_decimal_column( + Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) + ) + result = libcudf.binaryop.binaryop(self, other, op, bool) + return result + class Decimal32Column(DecimalBaseColumn): dtype: Decimal32Dtype @@ -156,47 +198,6 @@ def to_arrow(self): buffers=[mask_buf, data_buf], ) - def binary_operator(self, op, other, reflect=False): - if reflect: - self, other = other, self - - # Binary Arithmetics between decimal columns. `Scale` and `precision` - # are computed outside of libcudf - if op in ("add", "sub", "mul", "div"): - scale = _binop_scale(self.dtype, other.dtype, op) - output_type = Decimal64Dtype( - scale=scale, precision=Decimal64Dtype.MAX_PRECISION - ) # precision will be ignored, libcudf has no notion of precision - result = libcudf.binaryop.binaryop(self, other, op, output_type) - result.dtype.precision = _binop_precision( - self.dtype, other.dtype, op - ) - elif op in ("eq", "ne", "lt", "gt", "le", "ge"): - if not isinstance( - other, - ( - Decimal64Column, - cudf.core.column.NumericalColumn, - cudf.Scalar, - ), - ): - raise TypeError( - f"Operator {op} not supported between" - f"{str(type(self))} and {str(type(other))}" - ) - if isinstance( - other, cudf.core.column.NumericalColumn - ) and not is_integer_dtype(other.dtype): - raise TypeError( - f"Only decimal and integer column is supported for {op}." - ) - if isinstance(other, cudf.core.column.NumericalColumn): - other = other.as_decimal_column( - Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) - ) - result = libcudf.binaryop.binaryop(self, other, op, bool) - return result - def normalize_binop_value(self, other): if is_scalar(other) and isinstance(other, (int, np.int, Decimal)): return cudf.Scalar(Decimal(other)) From db742bbd875b5e78e803cc11d21bc801979688bc Mon Sep 17 00:00:00 2001 From: Sheilah Date: Fri, 7 Jan 2022 14:00:39 -0800 Subject: [PATCH 18/32] added checks for either decimal32 or decimal64 --- python/cudf/cudf/core/column/decimal.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 254882383c4..41b6f121cfb 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -53,9 +53,14 @@ def binary_operator(self, op, other, reflect=False): # are computed outside of libcudf if op in ("add", "sub", "mul", "div"): scale = _binop_scale(self.dtype, other.dtype, op) - output_type = Decimal64Dtype( - scale=scale, precision=Decimal64Dtype.MAX_PRECISION - ) + if self.dtype == other.dtype == cudf.Decimal32Dtype: + output_type = Decimal32Dtype( + scale=scale, precision=Decimal32Dtype.MAX_PRECISION + ) + if self.dtype == other.dtype == cudf.Decimal64Dtype: + output_type = Decimal64Dtype( + scale=scale, precision=Decimal64Dtype.MAX_PRECISION + ) result = libcudf.binaryop.binaryop(self, other, op, output_type) result.dtype.precision = _binop_precision( self.dtype, other.dtype, op @@ -322,4 +327,7 @@ def _binop_precision(l_dtype, r_dtype, op): else: raise NotImplementedError() # TODO - return min(result, cudf.Decimal64Dtype.MAX_PRECISION) + if l_dtype == r_dtype == Decimal32Dtype: + return min(result, cudf.Decimal32Dtype.MAX_PRECISION) + if l_dtype == r_dtype == Decimal64Dtype: + return min(result, cudf.Decimal64Dtype.MAX_PRECISION) From a29d1a324b020db9d6a0d93d220f9a9a1305ab74 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Tue, 18 Jan 2022 13:02:28 -0800 Subject: [PATCH 19/32] addressed michael's review comments --- python/cudf/cudf/core/dataframe.py | 12 +++++++----- python/cudf/cudf/tests/test_dataframe.py | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 4c6c7a1d202..a4385237665 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2607,8 +2607,10 @@ def insert(self, loc, name, value, nan_as_null=None): def diff(self, periods=1, axis=0): """ - Calculates the difference of a Dataframe element compared with - another element in the Dataframe, treating each column independently + First discrete difference of element. + + Calculates the difference of a Dataframe element compared with another + element in the Dataframe (default is element in previous row). Parameters ---------- @@ -2626,7 +2628,7 @@ def diff(self, periods=1, axis=0): Notes ----- - Diff currently only supports numeric dtype columns + Diff currently only supports numeric dtype columns. Examples -------- @@ -2635,7 +2637,7 @@ def diff(self, periods=1, axis=0): 'b': [1, 1, 2, 3, 5, 8], 'c': [1, 4, 9, 16, 25, 36]}) >>> gdf - a b c + a b c 0 1 1 1 1 2 1 4 2 3 2 9 @@ -2643,7 +2645,7 @@ def diff(self, periods=1, axis=0): 4 5 5 25 5 6 8 36 >>> gdf.diff(periods=2) - a b c + a b c 0 1 2 2 1 8 diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 6d3b49e862f..b90a64e928e 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9062,8 +9062,8 @@ def test_dataframe_add_suffix(): @pytest.mark.parametrize( "data", [ - np.random.randint(-50, 50, (25, 30)), - np.random.random_sample((4, 4)), + # np.random.seed(-50, 50, (25, 30)), + # np.random.random_sample.seed((4, 4)), np.array([1.123, 2.343, 5.890, 0.0]), [np.nan, None, np.nan, None], [True, False, True, False, False], From 3dc90b0224783e80bfdb782449804c9ae20d13a5 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 19 Jan 2022 12:21:41 -0800 Subject: [PATCH 20/32] use const seed for random generated number -- cases --- python/cudf/cudf/tests/test_dataframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index b90a64e928e..b63f73c065c 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9062,8 +9062,8 @@ def test_dataframe_add_suffix(): @pytest.mark.parametrize( "data", [ - # np.random.seed(-50, 50, (25, 30)), - # np.random.random_sample.seed((4, 4)), + np.random.RandomState(seed=10).randint(-50, 50, (25, 30)), + np.random.RandomState(seed=10).random_sample((4, 4)), np.array([1.123, 2.343, 5.890, 0.0]), [np.nan, None, np.nan, None], [True, False, True, False, False], From c8a424da19b4963c0ef3fec6c0e4defdc62056ac Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 24 Jan 2022 16:17:28 -0800 Subject: [PATCH 21/32] added check for periods>len(dataframe) --- python/cudf/cudf/core/dataframe.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 68bfbaf5699..c71b97428b6 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2660,6 +2660,12 @@ def diff(self, periods=1, axis=0): "Diff currently only supports numeric dtypes" ) + if abs(periods) > len(self): + df = cudf.DataFrame( + {name: ([cudf.NA] * len(self)) for name in self.columns} + ) + return df + return self - self.shift(periods=periods) def drop( From b91a795f701247655124147b3846b9a5608e3c0f Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 24 Jan 2022 16:19:20 -0800 Subject: [PATCH 22/32] added test for decimal32dtype, all tests passing. ready for review --- python/cudf/cudf/tests/test_dataframe.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 4640cb122ea..437f96269ca 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9074,12 +9074,11 @@ def test_dataframe_add_suffix(): np.random.RandomState(seed=10).randint(-50, 50, (25, 30)), np.random.RandomState(seed=10).random_sample((4, 4)), np.array([1.123, 2.343, 5.890, 0.0]), - [np.nan, None, np.nan, None], [True, False, True, False, False], {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]}, ], ) -@pytest.mark.parametrize("periods", range(-4, 5)) +@pytest.mark.parametrize("periods", range(-10, 10)) def test_diff_dataframe_numeric_dtypes(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() @@ -9095,10 +9094,13 @@ def test_diff_dataframe_numeric_dtypes(data, periods): @pytest.mark.parametrize( ("precision", "scale"), [(5, 2), (4, 3), (8, 5), (3, 1), (6, 4)], ) -def test_diff_decimal64_dtype(precision, scale): +@pytest.mark.parametrize( + "dtype", [cudf.Decimal32Dtype, cudf.Decimal64Dtype], +) +def test_diff_decimal_dtypes(precision, scale, dtype): gdf = cudf.DataFrame( np.random.uniform(10.5, 75.5, (10, 6)), - dtype=cudf.Decimal64Dtype(precision=precision, scale=scale), + dtype=dtype(precision=precision, scale=scale), ) pdf = gdf.to_pandas() @@ -9125,6 +9127,7 @@ def test_diff_dataframe_invalid_axis(): "string_col": ["a", "b", "c", "d", "e"], }, ["a", "b", "c", "d", "e"], + [np.nan, None, np.nan, None], ], ) def test_diff_dataframe_non_numeric_dypes(data): From 331945f4592445a0e3e8076890f5c21bb137f820 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Tue, 25 Jan 2022 09:21:37 -0800 Subject: [PATCH 23/32] use column_empty instead of cudf.NA to create df with all-nulls Co-authored-by: Michael Wang --- python/cudf/cudf/core/dataframe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c71b97428b6..62aa2cff0f7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2661,9 +2661,8 @@ def diff(self, periods=1, axis=0): ) if abs(periods) > len(self): - df = cudf.DataFrame( - {name: ([cudf.NA] * len(self)) for name in self.columns} - ) + df = cudf.DataFrame._from_data( + {name: column_empty(len(self), dtype=dtype, masked=True) for name, dtype in zip(self.columns, self.dtypes)}) return df return self - self.shift(periods=periods) From 559edc3a2940380da6a5c7858b56cd7eb1a5dd18 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Tue, 25 Jan 2022 12:00:36 -0800 Subject: [PATCH 24/32] apply bradley's suggestions to docstrings Co-authored-by: Bradley Dice --- python/cudf/cudf/core/dataframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 62aa2cff0f7..308c9e1621e 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2606,8 +2606,8 @@ def diff(self, periods=1, axis=0): """ First discrete difference of element. - Calculates the difference of a Dataframe element compared with another - element in the Dataframe (default is element in previous row). + Calculates the difference of a DataFrame element compared with another + element in the DataFrame (default is element in previous row). Parameters ---------- From f33b931c41526815a412a7ab53422c404d02ced6 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Tue, 25 Jan 2022 12:01:05 -0800 Subject: [PATCH 25/32] add dots to indicate continuation in docstring examples Co-authored-by: Bradley Dice --- python/cudf/cudf/core/dataframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 308c9e1621e..147a58e4b80 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2631,8 +2631,8 @@ def diff(self, periods=1, axis=0): -------- >>> import cudf >>> gdf = cudf.DataFrame({'a': [1, 2, 3, 4, 5, 6], - 'b': [1, 1, 2, 3, 5, 8], - 'c': [1, 4, 9, 16, 25, 36]}) + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) >>> gdf a b c 0 1 1 1 From 2dcabe99cbc6c5cb97019d3463c6abef6b67e026 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Tue, 25 Jan 2022 13:44:09 -0800 Subject: [PATCH 26/32] added checks for periods as integer, and axis --- python/cudf/cudf/core/dataframe.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 147a58e4b80..2c4979bfc12 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -33,6 +33,7 @@ is_datetime_dtype, is_dict_like, is_dtype_equal, + is_integer, is_list_dtype, is_list_like, is_numeric_dtype, @@ -2614,7 +2615,7 @@ def diff(self, periods=1, axis=0): periods : int, default 1 Periods to shift for calculating difference, accepts negative values. - axis : {0 or ‘index’, 1 or ‘columns’}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 Take difference over rows (0) or columns (1). Only row-wise (0) shift is supported. @@ -2651,18 +2652,27 @@ def diff(self, periods=1, axis=0): 5 2 5 20 """ + if not is_integer(periods): + if not (isinstance(periods, float) and isinstance(periods, int)): + raise ValueError("periods must be an integer") + periods = int(periods) - if not axis == 0: - raise NotImplementedError("Only axis=0 is supported.") + axis = self._get_axis_from_axis_arg(axis) + if axis != 0: + raise NotImplementedError("Only axis=0 is currently supported.") if not all(is_numeric_dtype(i) for i in self.dtypes): raise NotImplementedError( - "Diff currently only supports numeric dtypes" + "DataFrame.diff only supports numeric dtypes" ) if abs(periods) > len(self): df = cudf.DataFrame._from_data( - {name: column_empty(len(self), dtype=dtype, masked=True) for name, dtype in zip(self.columns, self.dtypes)}) + { + name: column_empty(len(self), dtype=dtype, masked=True) + for name, dtype in zip(self.columns, self.dtypes) + } + ) return df return self - self.shift(periods=periods) From e6b2400d933d0b91c4039c08b595968ed01c7a01 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Tue, 25 Jan 2022 13:45:05 -0800 Subject: [PATCH 27/32] minor test-fixes. ready for review --- python/cudf/cudf/tests/test_dataframe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index c1f163cbdc2..bc9f30ac587 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9084,7 +9084,7 @@ def test_dataframe_add_suffix(): {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]}, ], ) -@pytest.mark.parametrize("periods", range(-10, 10)) +@pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5)) def test_diff_dataframe_numeric_dtypes(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() @@ -9105,7 +9105,7 @@ def test_diff_dataframe_numeric_dtypes(data, periods): ) def test_diff_decimal_dtypes(precision, scale, dtype): gdf = cudf.DataFrame( - np.random.uniform(10.5, 75.5, (10, 6)), + np.random.default_rng(seed=42).uniform(10.5, 75.5, (10, 6)), dtype=dtype(precision=precision, scale=scale), ) pdf = gdf.to_pandas() @@ -9120,7 +9120,7 @@ def test_diff_decimal_dtypes(precision, scale, dtype): def test_diff_dataframe_invalid_axis(): with pytest.raises(NotImplementedError, match="Only axis=0 is supported."): - gdf = cudf.DataFrame(np.random.random_sample((4, 4))) + gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0])) gdf.diff(periods=1, axis=1) @@ -9139,7 +9139,7 @@ def test_diff_dataframe_invalid_axis(): def test_diff_dataframe_non_numeric_dypes(data): with pytest.raises( NotImplementedError, - match="Diff currently only supports numeric dtypes", + match="DataFrame.diff only supports numeric dtypes.", ): gdf = cudf.DataFrame(data) gdf.diff(periods=2, axis=0) From 3d59ba2d624ee44fe8605d6cf8d1b5e8eceff2f5 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Tue, 25 Jan 2022 13:53:14 -0800 Subject: [PATCH 28/32] fixed regex issues, all tests passing now --- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/tests/test_dataframe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 2c4979bfc12..439af3de222 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2659,7 +2659,7 @@ def diff(self, periods=1, axis=0): axis = self._get_axis_from_axis_arg(axis) if axis != 0: - raise NotImplementedError("Only axis=0 is currently supported.") + raise NotImplementedError("Only axis=0 is supported.") if not all(is_numeric_dtype(i) for i in self.dtypes): raise NotImplementedError( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index bc9f30ac587..417c2f76b29 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9139,7 +9139,7 @@ def test_diff_dataframe_invalid_axis(): def test_diff_dataframe_non_numeric_dypes(data): with pytest.raises( NotImplementedError, - match="DataFrame.diff only supports numeric dtypes.", + match="DataFrame.diff only supports numeric dtypes", ): gdf = cudf.DataFrame(data) gdf.diff(periods=2, axis=0) From e0722aea84b09333fb5151d3014dde805ecd3f08 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Wed, 26 Jan 2022 09:05:12 -0800 Subject: [PATCH 29/32] using pandas is_integer() and float() instead --- python/cudf/cudf/core/dataframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 439af3de222..e973468a322 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -20,6 +20,7 @@ from numba import cuda from nvtx import annotate from pandas._config import get_option +from pandas.core.dtypes.common import is_float, is_integer from pandas.io.formats import console from pandas.io.formats.printing import pprint_thing @@ -33,7 +34,6 @@ is_datetime_dtype, is_dict_like, is_dtype_equal, - is_integer, is_list_dtype, is_list_like, is_numeric_dtype, @@ -2653,7 +2653,7 @@ def diff(self, periods=1, axis=0): """ if not is_integer(periods): - if not (isinstance(periods, float) and isinstance(periods, int)): + if not (is_float(periods) and periods.is_integer()): raise ValueError("periods must be an integer") periods = int(periods) From cdf5187166fb46cc4bccffe1b8b7bf2fcce427a8 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Thu, 3 Feb 2022 21:56:20 -0800 Subject: [PATCH 30/32] omit unnecessary extra tests. Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a3b7bd2373f..32511d99a98 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9094,7 +9094,7 @@ def test_diff_dataframe_numeric_dtypes(data, periods): @pytest.mark.parametrize( - ("precision", "scale"), [(5, 2), (4, 3), (8, 5), (3, 1), (6, 4)], + ("precision", "scale"), [(5, 2), (8, 5)], ) @pytest.mark.parametrize( "dtype", [cudf.Decimal32Dtype, cudf.Decimal64Dtype], From 50b5085c6b3881abc18b941c66fb970155b60aa4 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Thu, 3 Feb 2022 21:58:23 -0800 Subject: [PATCH 31/32] only use context manager around function that raises Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 32511d99a98..f3c2d233617 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9115,8 +9115,8 @@ def test_diff_decimal_dtypes(precision, scale, dtype): def test_diff_dataframe_invalid_axis(): + gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0])) with pytest.raises(NotImplementedError, match="Only axis=0 is supported."): - gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0])) gdf.diff(periods=1, axis=1) From 575c11857c7e00396989ff81dc740db2913338e1 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Thu, 3 Feb 2022 21:59:14 -0800 Subject: [PATCH 32/32] context manager around function only Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index f3c2d233617..f3996d1f4bd 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9133,11 +9133,11 @@ def test_diff_dataframe_invalid_axis(): ], ) def test_diff_dataframe_non_numeric_dypes(data): + gdf = cudf.DataFrame(data) with pytest.raises( NotImplementedError, match="DataFrame.diff only supports numeric dtypes", ): - gdf = cudf.DataFrame(data) gdf.diff(periods=2, axis=0)