From e030e52e349e74cc99290a995d453ffaef2cb69b Mon Sep 17 00:00:00 2001 From: a-hirota Date: Thu, 14 Nov 2024 16:51:54 +0900 Subject: [PATCH 01/10] Include Decimal32 and Decimal64 in round operation --- python/cudf/cudf/core/indexed_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index e031f2a4e8e..07b40fea4d1 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3936,7 +3936,7 @@ def round(self, decimals=0, how="half_even"): cols = ( col.round(decimals[name], how=how) - if name in decimals and col.dtype.kind in "fiu" + if name in decimals and (col.dtype.kind in "fiu" or col.dtype in ["Decimal32Dtype", "Decimal64Dtype"]) else col.copy(deep=True) for name, col in self._column_labels_and_values ) From 38d40d94ee88e07cdfa325ae75d70ff2780b61dd Mon Sep 17 00:00:00 2001 From: a-hirota Date: Thu, 14 Nov 2024 17:14:27 +0900 Subject: [PATCH 02/10] my recent work --- notebooks/test.ipynb | 218 +++++++++++++++++++++++++ python/cudf/cudf/core/indexed_frame.py | 6 +- 2 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 notebooks/test.ipynb diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb new file mode 100644 index 00000000000..a4d821886d4 --- /dev/null +++ b/notebooks/test.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fa61b219-581f-45d4-b512-06bfdc4c1139", + "metadata": {}, + "outputs": [], + "source": [ + "import cudf" + ] + }, + { + "cell_type": "markdown", + "id": "7ed30f6d-7afd-4a1b-8b89-d01d911d30c0", + "metadata": {}, + "source": [ + "## 元の実績" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8fa4d458-bc4f-4c59-8a50-4292528b1029", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float\n", + " a\n", + "0 1.000\n", + "1 2.054\n", + "2 3.010\n", + " a\n", + "0 1.0\n", + "1 2.1\n", + "2 3.0\n", + "decimal\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n" + ] + } + ], + "source": [ + "def check(t):\n", + " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", + " print( type(t),a)\n", + " a = a.round(decimals=1)\n", + " print(type(t), a)\n", + "\n", + "print('float')\n", + "check(float)\n", + "print('decimal')\n", + "check(cudf.Decimal32Dtype(precision=3, scale=2))" + ] + }, + { + "cell_type": "markdown", + "id": "f8d8de2b-9bf9-45ac-8953-181f6de844c4", + "metadata": {}, + "source": [ + "## 改修後\n", + "\n", + " cols = (\n", + " col.round(decimals[name], how=how)\n", + " if name in decimals and (\n", + " col.dtype.kind in \"fiu\" or \n", + " isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))\n", + " )\n", + " else col.copy(deep=True)\n", + " for name, col in self._column_labels_and_values" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6df9ca22-da3a-4456-b7da-186a746d751e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float\n", + " a\n", + "0 1.000\n", + "1 2.054\n", + "2 3.010\n", + " a\n", + "0 1.0\n", + "1 2.1\n", + "2 3.0\n", + "decimal\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n" + ] + } + ], + "source": [ + "def check(t):\n", + " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", + " print( type(t),a)\n", + " a = a.round(decimals=1)\n", + " print(type(t), a)\n", + "\n", + "print('float')\n", + "check(float)\n", + "print('decimal')\n", + "check(cudf.Decimal32Dtype(precision=3, scale=2))" + ] + }, + { + "cell_type": "markdown", + "id": "db35ea7d-3b18-44bb-bede-3a07743a3a0a", + "metadata": {}, + "source": [ + "## 改修(\"fiu\"外す)\n", + "\n", + "\n", + " col.round(decimals[name], how=how)\n", + " if name in decimals\n", + " #and (\n", + " # col.dtype.kind in \"fiu\" or\n", + " # isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))\n", + " # )\n", + " else col.copy(deep=True)\n", + " for name, col in self._column_labels_and_values" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9b2e4d00-1ba0-44a6-a0bc-1a7c8c6b5540", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float\n", + " a\n", + "0 1.000\n", + "1 2.054\n", + "2 3.010\n", + " a\n", + "0 1.0\n", + "1 2.1\n", + "2 3.0\n", + "decimal\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n", + " a\n", + "0 1.00\n", + "1 2.05\n", + "2 3.01\n" + ] + } + ], + "source": [ + "def check(t):\n", + " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", + " print( type(t),a)\n", + " a = a.round(decimals=1)\n", + " print(type(t), a)\n", + "\n", + "print('float')\n", + "check(float)\n", + "print('decimal')\n", + "check(cudf.Decimal32Dtype(precision=3, scale=2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c39117df-629e-4876-bd35-40baf0f3ab26", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 07b40fea4d1..14edb150bd2 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3936,7 +3936,11 @@ def round(self, decimals=0, how="half_even"): cols = ( col.round(decimals[name], how=how) - if name in decimals and (col.dtype.kind in "fiu" or col.dtype in ["Decimal32Dtype", "Decimal64Dtype"]) + if name in decimals + #and ( + # col.dtype.kind in "fiu" or + # isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)) + # ) else col.copy(deep=True) for name, col in self._column_labels_and_values ) From 6d75b0fcf03bf49c15fc9358a75277fc5415ee26 Mon Sep 17 00:00:00 2001 From: a-hirota Date: Fri, 15 Nov 2024 08:39:43 +0900 Subject: [PATCH 03/10] my recent work --- notebooks/test.ipynb | 218 ------------------------- python/cudf/cudf/core/indexed_frame.py | 7 +- 2 files changed, 2 insertions(+), 223 deletions(-) delete mode 100644 notebooks/test.ipynb diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb deleted file mode 100644 index a4d821886d4..00000000000 --- a/notebooks/test.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "fa61b219-581f-45d4-b512-06bfdc4c1139", - "metadata": {}, - "outputs": [], - "source": [ - "import cudf" - ] - }, - { - "cell_type": "markdown", - "id": "7ed30f6d-7afd-4a1b-8b89-d01d911d30c0", - "metadata": {}, - "source": [ - "## 元の実績" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8fa4d458-bc4f-4c59-8a50-4292528b1029", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float\n", - " a\n", - "0 1.000\n", - "1 2.054\n", - "2 3.010\n", - " a\n", - "0 1.0\n", - "1 2.1\n", - "2 3.0\n", - "decimal\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n" - ] - } - ], - "source": [ - "def check(t):\n", - " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", - " print( type(t),a)\n", - " a = a.round(decimals=1)\n", - " print(type(t), a)\n", - "\n", - "print('float')\n", - "check(float)\n", - "print('decimal')\n", - "check(cudf.Decimal32Dtype(precision=3, scale=2))" - ] - }, - { - "cell_type": "markdown", - "id": "f8d8de2b-9bf9-45ac-8953-181f6de844c4", - "metadata": {}, - "source": [ - "## 改修後\n", - "\n", - " cols = (\n", - " col.round(decimals[name], how=how)\n", - " if name in decimals and (\n", - " col.dtype.kind in \"fiu\" or \n", - " isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))\n", - " )\n", - " else col.copy(deep=True)\n", - " for name, col in self._column_labels_and_values" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6df9ca22-da3a-4456-b7da-186a746d751e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float\n", - " a\n", - "0 1.000\n", - "1 2.054\n", - "2 3.010\n", - " a\n", - "0 1.0\n", - "1 2.1\n", - "2 3.0\n", - "decimal\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n" - ] - } - ], - "source": [ - "def check(t):\n", - " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", - " print( type(t),a)\n", - " a = a.round(decimals=1)\n", - " print(type(t), a)\n", - "\n", - "print('float')\n", - "check(float)\n", - "print('decimal')\n", - "check(cudf.Decimal32Dtype(precision=3, scale=2))" - ] - }, - { - "cell_type": "markdown", - "id": "db35ea7d-3b18-44bb-bede-3a07743a3a0a", - "metadata": {}, - "source": [ - "## 改修(\"fiu\"外す)\n", - "\n", - "\n", - " col.round(decimals[name], how=how)\n", - " if name in decimals\n", - " #and (\n", - " # col.dtype.kind in \"fiu\" or\n", - " # isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))\n", - " # )\n", - " else col.copy(deep=True)\n", - " for name, col in self._column_labels_and_values" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "9b2e4d00-1ba0-44a6-a0bc-1a7c8c6b5540", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float\n", - " a\n", - "0 1.000\n", - "1 2.054\n", - "2 3.010\n", - " a\n", - "0 1.0\n", - "1 2.1\n", - "2 3.0\n", - "decimal\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n", - " a\n", - "0 1.00\n", - "1 2.05\n", - "2 3.01\n" - ] - } - ], - "source": [ - "def check(t):\n", - " a = cudf.DataFrame( { 'a' : [1.00, 2.054, 3.01]} ).astype(t)\n", - " print( type(t),a)\n", - " a = a.round(decimals=1)\n", - " print(type(t), a)\n", - "\n", - "print('float')\n", - "check(float)\n", - "print('decimal')\n", - "check(cudf.Decimal32Dtype(precision=3, scale=2))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c39117df-629e-4876-bd35-40baf0f3ab26", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 14edb150bd2..84691c6d425 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3936,14 +3936,11 @@ def round(self, decimals=0, how="half_even"): cols = ( col.round(decimals[name], how=how) - if name in decimals - #and ( - # col.dtype.kind in "fiu" or - # isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)) - # ) + if name in decimals and ( col.dtype.kind in "fiu" or isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))) else col.copy(deep=True) for name, col in self._column_labels_and_values ) + return self._from_data_like_self( self._data._from_columns_like_self(cols) ) From 83b384f95291443c0f0a5fac4fd5fd918261c17a Mon Sep 17 00:00:00 2001 From: a-hirota Date: Fri, 15 Nov 2024 08:47:15 +0900 Subject: [PATCH 04/10] my recent work --- python/cudf/cudf/core/indexed_frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 84691c6d425..c09597d4f1b 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3940,7 +3940,6 @@ def round(self, decimals=0, how="half_even"): else col.copy(deep=True) for name, col in self._column_labels_and_values ) - return self._from_data_like_self( self._data._from_columns_like_self(cols) ) From 73ede0096f41d514de7e8c34cef7a38063931c0e Mon Sep 17 00:00:00 2001 From: a-hirota Date: Fri, 15 Nov 2024 17:43:37 +0900 Subject: [PATCH 05/10] Add test cases for Decimal32 and Decimal64 rounding modes - Added test cases for "half_up" and "half_even" rounding modes. - Tested both Decimal32Dtype and Decimal64Dtype with various precisions and scales. - Ensured coverage for edge cases like .5 rounding to the nearest even number in "half_even". - Verified correctness of rounding logic across different decimal places. --- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/tests/test_series.py | 37 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index c09597d4f1b..b42e3e787c9 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3936,7 +3936,7 @@ def round(self, decimals=0, how="half_even"): cols = ( col.round(decimals[name], how=how) - if name in decimals and ( col.dtype.kind in "fiu" or isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))) + if name in decimals and (col.dtype.kind in "fiu" or isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))) else col.copy(deep=True) for name, col in self._column_labels_and_values ) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 7f0a4902ed1..48d933a50cc 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -14,6 +14,9 @@ import pytest import cudf +from cudf.core.dtypes import Decimal32Dtype +from cudf.core.dtypes import Decimal64Dtype + from cudf.api.extensions import no_default from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.errors import MixedTypeError @@ -772,6 +775,40 @@ def test_round_nan_as_null_false(series, decimal): assert_eq(result, expected, atol=1e-10) +@pytest.mark.parametrize( + "data, dtype, decimals, expected_half_up, expected_half_even", + [ + # Decimal32Dtype Test Cases + ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 2, [1.23, 2.35, 3.46], [1.23, 2.34, 3.46]), + ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 0, [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), + ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 3, [1.234, 2.345, 3.456], [1.234, 2.345, 3.456]), + # Decimal64Dtype Test Cases + ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 4, [1.2346, 2.3457, 3.4568], [1.2346, 2.3457, 3.4568]), + ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 2, [1.23, 2.35, 3.46], [1.23, 2.35, 3.46]), + ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 6, [1.234567, 2.345678, 3.456789], [1.234567, 2.345678, 3.456789]), + ], +) +def test_series_round_decimal(data, dtype, decimals, expected_half_up, expected_half_even): + ser = cudf.Series(data).astype(dtype) + + # Half-up rounding + result_half_up = ser.round(decimals=decimals, how="half_up").astype(dtype) + # print("Rounded Series (half_up):", result_half_up) + + expected_ser_half_up = cudf.Series(expected_half_up).astype(dtype) + # print("Expected Series (half_up):", expected_ser_half_up) + + assert_eq(result_half_up, expected_ser_half_up) + + # Half-even rounding + result_half_even = ser.round(decimals=decimals, how="half_even").astype(dtype) + # print("Rounded Series (half_even):", result_half_even) + + expected_ser_half_even = cudf.Series(expected_half_even).astype(dtype) + # print("Expected Series (half_even):", expected_ser_half_even) + assert_eq(result_half_even, expected_ser_half_even) + + @pytest.mark.parametrize("ps", _series_na_data()) @pytest.mark.parametrize("nan_as_null", [True, False, None]) def test_series_isnull_isna(ps, nan_as_null): From 3482094a99bd5b8e6bde2e4c0adcdf34c4b206fb Mon Sep 17 00:00:00 2001 From: Hirota Akio <33370421+a-hirota@users.noreply.github.com> Date: Sat, 16 Nov 2024 12:58:34 +0900 Subject: [PATCH 06/10] Update python/cudf/cudf/tests/test_series.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- python/cudf/cudf/tests/test_series.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 48d933a50cc..6c30960fce0 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -14,9 +14,6 @@ import pytest import cudf -from cudf.core.dtypes import Decimal32Dtype -from cudf.core.dtypes import Decimal64Dtype - from cudf.api.extensions import no_default from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.errors import MixedTypeError From 1b897a2d41fbe6c9a8d99640e2a26fff123ede82 Mon Sep 17 00:00:00 2001 From: Hirota Akio <33370421+a-hirota@users.noreply.github.com> Date: Sat, 16 Nov 2024 12:59:20 +0900 Subject: [PATCH 07/10] Update python/cudf/cudf/tests/test_series.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- python/cudf/cudf/tests/test_series.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 6c30960fce0..8baf981cd6f 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -775,14 +775,12 @@ def test_round_nan_as_null_false(series, decimal): @pytest.mark.parametrize( "data, dtype, decimals, expected_half_up, expected_half_even", [ - # Decimal32Dtype Test Cases - ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 2, [1.23, 2.35, 3.46], [1.23, 2.34, 3.46]), - ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 0, [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), - ([1.234, 2.345, 3.456], Decimal32Dtype(precision=5, scale=3), 3, [1.234, 2.345, 3.456], [1.234, 2.345, 3.456]), - # Decimal64Dtype Test Cases - ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 4, [1.2346, 2.3457, 3.4568], [1.2346, 2.3457, 3.4568]), - ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 2, [1.23, 2.35, 3.46], [1.23, 2.35, 3.46]), - ([1.234567, 2.345678, 3.456789], Decimal64Dtype(precision=10, scale=6), 6, [1.234567, 2.345678, 3.456789], [1.234567, 2.345678, 3.456789]), + ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 2, [1.23, 2.35, 3.46], [1.23, 2.34, 3.46]), + ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 0, [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), + ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 3, [1.234, 2.345, 3.456], [1.234, 2.345, 3.456]), + ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 4, [1.2346, 2.3457, 3.4568], [1.2346, 2.3457, 3.4568]), + ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 2, [1.23, 2.35, 3.46], [1.23, 2.35, 3.46]), + ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 6, [1.234567, 2.345678, 3.456789], [1.234567, 2.345678, 3.456789]), ], ) def test_series_round_decimal(data, dtype, decimals, expected_half_up, expected_half_even): From 3a0019456aad1ac3cdf73f84c370197da5132275 Mon Sep 17 00:00:00 2001 From: a-hirota Date: Sat, 16 Nov 2024 13:27:17 +0900 Subject: [PATCH 08/10] Addressed review feedback from mroeschke: Removed comments and print statements from the test function as requested. Clarified the approach for handling "half_up" and "half_even" rounding methods. --- python/cudf/cudf/tests/test_series.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 8baf981cd6f..bc0dc90728e 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -786,21 +786,12 @@ def test_round_nan_as_null_false(series, decimal): def test_series_round_decimal(data, dtype, decimals, expected_half_up, expected_half_even): ser = cudf.Series(data).astype(dtype) - # Half-up rounding result_half_up = ser.round(decimals=decimals, how="half_up").astype(dtype) - # print("Rounded Series (half_up):", result_half_up) - expected_ser_half_up = cudf.Series(expected_half_up).astype(dtype) - # print("Expected Series (half_up):", expected_ser_half_up) - assert_eq(result_half_up, expected_ser_half_up) - # Half-even rounding result_half_even = ser.round(decimals=decimals, how="half_even").astype(dtype) - # print("Rounded Series (half_even):", result_half_even) - expected_ser_half_even = cudf.Series(expected_half_even).astype(dtype) - # print("Expected Series (half_even):", expected_ser_half_even) assert_eq(result_half_even, expected_ser_half_even) From 3f1badebfe4df0dd17435ac897333898572faa6b Mon Sep 17 00:00:00 2001 From: a-hirota Date: Wed, 20 Nov 2024 11:05:27 +0900 Subject: [PATCH 09/10] style: apply pre-commit fixes Ran pre-commit to address style issues as per review feedback. This ensures consistency with project coding standards. --- python/cudf/cudf/core/indexed_frame.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 87c29ee6965..f31674f21a8 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3969,7 +3969,13 @@ def round(self, decimals=0, how="half_even"): cols = ( col.round(decimals[name], how=how) - if name in decimals and (col.dtype.kind in "fiu" or isinstance(col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype))) + if name in decimals + and ( + col.dtype.kind in "fiu" + or isinstance( + col.dtype, (cudf.Decimal32Dtype, cudf.Decimal64Dtype) + ) + ) else col.copy(deep=True) for name, col in self._column_labels_and_values ) From 3ebad5e19a2d98d7a2e4028304e3890825c4dd29 Mon Sep 17 00:00:00 2001 From: a-hirota Date: Thu, 21 Nov 2024 20:25:42 +0900 Subject: [PATCH 10/10] fix: add missing test file after running pre-commit --- python/cudf/cudf/tests/test_series.py | 58 ++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index bc0dc90728e..f9954b65475 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -775,24 +775,64 @@ def test_round_nan_as_null_false(series, decimal): @pytest.mark.parametrize( "data, dtype, decimals, expected_half_up, expected_half_even", [ - ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 2, [1.23, 2.35, 3.46], [1.23, 2.34, 3.46]), - ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 0, [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), - ([1.234, 2.345, 3.456], cudf.Decimal32Dtype(precision=5, scale=3), 3, [1.234, 2.345, 3.456], [1.234, 2.345, 3.456]), - ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 4, [1.2346, 2.3457, 3.4568], [1.2346, 2.3457, 3.4568]), - ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 2, [1.23, 2.35, 3.46], [1.23, 2.35, 3.46]), - ([1.234567, 2.345678, 3.456789], cudf.Decimal64Dtype(precision=10, scale=6), 6, [1.234567, 2.345678, 3.456789], [1.234567, 2.345678, 3.456789]), + ( + [1.234, 2.345, 3.456], + cudf.Decimal32Dtype(precision=5, scale=3), + 2, + [1.23, 2.35, 3.46], + [1.23, 2.34, 3.46], + ), + ( + [1.234, 2.345, 3.456], + cudf.Decimal32Dtype(precision=5, scale=3), + 0, + [1.0, 2.0, 3.0], + [1.0, 2.0, 3.0], + ), + ( + [1.234, 2.345, 3.456], + cudf.Decimal32Dtype(precision=5, scale=3), + 3, + [1.234, 2.345, 3.456], + [1.234, 2.345, 3.456], + ), + ( + [1.234567, 2.345678, 3.456789], + cudf.Decimal64Dtype(precision=10, scale=6), + 4, + [1.2346, 2.3457, 3.4568], + [1.2346, 2.3457, 3.4568], + ), + ( + [1.234567, 2.345678, 3.456789], + cudf.Decimal64Dtype(precision=10, scale=6), + 2, + [1.23, 2.35, 3.46], + [1.23, 2.35, 3.46], + ), + ( + [1.234567, 2.345678, 3.456789], + cudf.Decimal64Dtype(precision=10, scale=6), + 6, + [1.234567, 2.345678, 3.456789], + [1.234567, 2.345678, 3.456789], + ), ], ) -def test_series_round_decimal(data, dtype, decimals, expected_half_up, expected_half_even): +def test_series_round_decimal( + data, dtype, decimals, expected_half_up, expected_half_even +): ser = cudf.Series(data).astype(dtype) result_half_up = ser.round(decimals=decimals, how="half_up").astype(dtype) expected_ser_half_up = cudf.Series(expected_half_up).astype(dtype) assert_eq(result_half_up, expected_ser_half_up) - result_half_even = ser.round(decimals=decimals, how="half_even").astype(dtype) + result_half_even = ser.round(decimals=decimals, how="half_even").astype( + dtype + ) expected_ser_half_even = cudf.Series(expected_half_even).astype(dtype) - assert_eq(result_half_even, expected_ser_half_even) + assert_eq(result_half_even, expected_ser_half_even) @pytest.mark.parametrize("ps", _series_na_data())