Skip to content

Commit

Permalink
clib.converison._to_numpy: Add tests for pandas.Series with pandas nu…
Browse files Browse the repository at this point in the history
…meric dtypes (#3584)

Co-authored-by: Wei Ji <[email protected]>
Co-authored-by: Yvonne Fröhlich <[email protected]>
Co-authored-by: Michael Grund <[email protected]>
  • Loading branch information
4 people authored Dec 12, 2024
1 parent 4d9f897 commit 351247d
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 13 deletions.
90 changes: 88 additions & 2 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def timestamp(unit: str, tz: str | None = None):

_HAS_PYARROW = False

# Mark tests that require pyarrow
pa_marks = {"marks": skip_if_no(package="pyarrow")}


def _check_result(result, expected_dtype):
"""
Expand Down Expand Up @@ -173,22 +176,105 @@ def test_to_numpy_numpy_string(dtype):
# - BooleanDtype
# - ArrowDtype: a special dtype used to store data in the PyArrow format.
#
# In pandas, PyArrow types can be specified using the following formats:
#
# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]")
# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())")
#
# References:
# 1. https://pandas.pydata.org/docs/reference/arrays.html
# 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes
# 3. https://pandas.pydata.org/docs/user_guide/pyarrow.html
########################################################################################
@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params)
@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
*np_dtype_params,
pytest.param(pd.Int8Dtype(), np.int8, id="Int8"),
pytest.param(pd.Int16Dtype(), np.int16, id="Int16"),
pytest.param(pd.Int32Dtype(), np.int32, id="Int32"),
pytest.param(pd.Int64Dtype(), np.int64, id="Int64"),
pytest.param(pd.UInt8Dtype(), np.uint8, id="UInt8"),
pytest.param(pd.UInt16Dtype(), np.uint16, id="UInt16"),
pytest.param(pd.UInt32Dtype(), np.uint32, id="UInt32"),
pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"),
pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks),
pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks),
pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks),
pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks),
pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks),
pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks),
pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks),
pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks),
pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
],
)
def test_to_numpy_pandas_numeric(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of numeric dtypes.
"""
series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous
data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
# float16 needs special handling for pandas < 2.2.
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
data = np.array(data, dtype=np.float16)
series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous
result = _to_numpy(series)
_check_result(result, expected_dtype)
npt.assert_array_equal(result, series)


@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
pytest.param(np.float16, np.float16, id="float16"),
pytest.param(np.float32, np.float32, id="float32"),
pytest.param(np.float64, np.float64, id="float64"),
pytest.param(np.longdouble, np.longdouble, id="longdouble"),
pytest.param(pd.Int8Dtype(), np.float64, id="Int8"),
pytest.param(pd.Int16Dtype(), np.float64, id="Int16"),
pytest.param(pd.Int32Dtype(), np.float64, id="Int32"),
pytest.param(pd.Int64Dtype(), np.float64, id="Int64"),
pytest.param(pd.UInt8Dtype(), np.float64, id="UInt8"),
pytest.param(pd.UInt16Dtype(), np.float64, id="UInt16"),
pytest.param(pd.UInt32Dtype(), np.float64, id="UInt32"),
pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"),
pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks),
pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks),
pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks),
pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks),
pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks),
pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks),
pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks),
pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks),
pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
],
)
def test_to_numpy_pandas_numeric_with_na(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of NumPy/pandas/PyArrow numeric
dtypes and missing values (NA).
"""
data = [1.0, 2.0, None, 4.0, 5.0, 6.0]
if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
# float16 needs special handling for pandas < 2.2.
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
data = np.array(data, dtype=np.float16)
series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous
assert series.isna().any()
result = _to_numpy(series)
_check_result(result, expected_dtype)
npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype))


@pytest.mark.parametrize(
"dtype",
[
Expand Down
11 changes: 0 additions & 11 deletions pygmt/tests/test_clib_vectors_to_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,6 @@ def test_vectors_to_arrays_not_c_contiguous():
_check_arrays(arrays)


def test_vectors_to_arrays_pandas_nan():
"""
Test the vectors_to_arrays function with pandas Series containing NaNs.
"""
vectors = [pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype())]
arrays = vectors_to_arrays(vectors)
npt.assert_equal(arrays[0], np.array([0, 4, np.nan, 8, 6], dtype=np.float64))
assert arrays[0].dtype == np.float64
_check_arrays(arrays)


@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed.")
def test_vectors_to_arrays_pyarrow_datetime():
"""
Expand Down

0 comments on commit 351247d

Please sign in to comment.