From 351247de45927537270bec98f9e284a6c759768b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 12 Dec 2024 09:29:22 +0800 Subject: [PATCH] clib.converison._to_numpy: Add tests for pandas.Series with pandas numeric dtypes (#3584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> Co-authored-by: Yvonne Fröhlich <94163266+yvonnefroehlich@users.noreply.github.com> Co-authored-by: Michael Grund <23025878+michaelgrund@users.noreply.github.com> --- pygmt/tests/test_clib_to_numpy.py | 90 +++++++++++++++++++++- pygmt/tests/test_clib_vectors_to_arrays.py | 11 --- 2 files changed, 88 insertions(+), 13 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 64db3ea6547..29fc50826ab 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -34,6 +34,9 @@ def timestamp(unit: str, tz: str | None = None): _HAS_PYARROW = False +# Mark tests that require pyarrow +pa_marks = {"marks": skip_if_no(package="pyarrow")} + def _check_result(result, expected_dtype): """ @@ -173,22 +176,105 @@ def test_to_numpy_numpy_string(dtype): # - BooleanDtype # - ArrowDtype: a special dtype used to store data in the PyArrow format. # +# In pandas, PyArrow types can be specified using the following formats: +# +# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]") +# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())") +# # References: # 1. https://pandas.pydata.org/docs/reference/arrays.html # 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes # 3. https://pandas.pydata.org/docs/user_guide/pyarrow.html ######################################################################################## -@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params) +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + *np_dtype_params, + pytest.param(pd.Int8Dtype(), np.int8, id="Int8"), + pytest.param(pd.Int16Dtype(), np.int16, id="Int16"), + pytest.param(pd.Int32Dtype(), np.int32, id="Int32"), + pytest.param(pd.Int64Dtype(), np.int64, id="Int64"), + pytest.param(pd.UInt8Dtype(), np.uint8, id="UInt8"), + pytest.param(pd.UInt16Dtype(), np.uint16, id="UInt16"), + pytest.param(pd.UInt32Dtype(), np.uint32, id="UInt32"), + pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"), + pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), + pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), + ], +) def test_to_numpy_pandas_numeric(dtype, expected_dtype): """ Test the _to_numpy function with pandas.Series of numeric dtypes. """ - series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, series) +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param(np.float16, np.float16, id="float16"), + pytest.param(np.float32, np.float32, id="float32"), + pytest.param(np.float64, np.float64, id="float64"), + pytest.param(np.longdouble, np.longdouble, id="longdouble"), + pytest.param(pd.Int8Dtype(), np.float64, id="Int8"), + pytest.param(pd.Int16Dtype(), np.float64, id="Int16"), + pytest.param(pd.Int32Dtype(), np.float64, id="Int32"), + pytest.param(pd.Int64Dtype(), np.float64, id="Int64"), + pytest.param(pd.UInt8Dtype(), np.float64, id="UInt8"), + pytest.param(pd.UInt16Dtype(), np.float64, id="UInt16"), + pytest.param(pd.UInt32Dtype(), np.float64, id="UInt32"), + pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"), + pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), + pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), + ], +) +def test_to_numpy_pandas_numeric_with_na(dtype, expected_dtype): + """ + Test the _to_numpy function with pandas.Series of NumPy/pandas/PyArrow numeric + dtypes and missing values (NA). + """ + data = [1.0, 2.0, None, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous + assert series.isna().any() + result = _to_numpy(series) + _check_result(result, expected_dtype) + npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype)) + + @pytest.mark.parametrize( "dtype", [ diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index e59690069ca..b85912d48d6 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -69,17 +69,6 @@ def test_vectors_to_arrays_not_c_contiguous(): _check_arrays(arrays) -def test_vectors_to_arrays_pandas_nan(): - """ - Test the vectors_to_arrays function with pandas Series containing NaNs. - """ - vectors = [pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype())] - arrays = vectors_to_arrays(vectors) - npt.assert_equal(arrays[0], np.array([0, 4, np.nan, 8, 6], dtype=np.float64)) - assert arrays[0].dtype == np.float64 - _check_arrays(arrays) - - @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed.") def test_vectors_to_arrays_pyarrow_datetime(): """