Skip to content

Commit

Permalink
feat: add support for Pandas 1.5 (#1076)
Browse files Browse the repository at this point in the history
  • Loading branch information
akx authored and vascoalramos committed Oct 20, 2022
1 parent 8ef9556 commit 5c5a710
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 83 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
joblib~=1.2.0 # 1.1.0
scipy>=1.4.1, <1.10
pandas>1.1, <1.5, !=1.4.0
pandas>1.1, <1.6, !=1.4.0
matplotlib>=3.2, <3.6
pydantic>=1.8.1, <1.11
PyYAML>=5.0.0, <6.1
Expand All @@ -23,4 +23,4 @@ tqdm>=4.48.2, <4.65
seaborn>=0.10.1, <0.13
multimethod>=1.4, <1.10
# metrics
statsmodels>=0.13.2, <0.14
statsmodels>=0.13.2, <0.14
7 changes: 6 additions & 1 deletion src/pandas_profiling/model/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
import numpy as np
import pandas as pd
from multimethod import multimethod
from pandas.core.base import DataError

from pandas_profiling.config import Settings
from pandas_profiling.utils.compat import pandas_version_info

if pandas_version_info() >= (1, 5):
from pandas.errors import DataError
else:
from pandas.core.base import DataError


class Correlation:
Expand Down
16 changes: 10 additions & 6 deletions src/pandas_profiling/model/pandas/correlations_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,16 @@ def pandas_cramers_compute(
) -> Optional[pd.DataFrame]:
threshold = config.categorical_maximum_correlation_distinct

categoricals = {
key
for key, value in summary.items()
if value["type"] in {"Categorical", "Boolean"}
and value["n_distinct"] <= threshold
}
# `index` and `columns` must not be a set since Pandas 1.5,
# so convert it to a list. The order of the list is arbitrary.
categoricals = list(
{
key
for key, value in summary.items()
if value["type"] in {"Categorical", "Boolean"}
and value["n_distinct"] <= threshold
}
)

if len(categoricals) <= 1:
return None
Expand Down
10 changes: 8 additions & 2 deletions src/pandas_profiling/model/pandas/describe_numeric_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

import numpy as np
import pandas as pd
from pandas.core.arrays.integer import _IntegerDtype

from pandas_profiling.utils.compat import pandas_version_info

if pandas_version_info() >= (1, 5):
from pandas.core.arrays.integer import IntegerDtype
else:
from pandas.core.arrays.integer import _IntegerDtype as IntegerDtype

from pandas_profiling.config import Settings
from pandas_profiling.model.summary_algorithms import (
Expand Down Expand Up @@ -95,7 +101,7 @@ def pandas_describe_numeric_1d(

stats = summary

if isinstance(series.dtype, _IntegerDtype):
if isinstance(series.dtype, IntegerDtype):
stats.update(numeric_stats_pandas(series))
present_values = series.astype(str(series.dtype).lower())
finite_values = present_values
Expand Down
6 changes: 2 additions & 4 deletions src/pandas_profiling/model/typeset_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def to_category(series: pd.Series, state: dict) -> pd.Series:
if hasnans:
val = val.replace("nan", np.nan)

if int(pd.__version__.split(".")[0]) >= 1:
val = val.astype("string")
return val
return val.astype("string")


@series_handle_nulls
Expand Down Expand Up @@ -87,7 +85,7 @@ def category_to_numeric(series: pd.Series, state: dict) -> pd.Series:
return pd.to_numeric(series, errors="coerce")


hasnan_bool_name = "boolean" if int(pd.__version__.split(".")[0]) >= 1 else "Bool"
hasnan_bool_name = "boolean"


def to_bool(series: pd.Series) -> pd.Series:
Expand Down
14 changes: 14 additions & 0 deletions src/pandas_profiling/utils/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Utility functions for (version) compatibility"""
from functools import lru_cache
from typing import Tuple

import pandas as pd


@lru_cache(maxsize=1)
def pandas_version_info() -> Tuple[int, ...]:
"""
Get the Pandas version as a tuple of integers,
akin to `sys.version_info` for the Python version.
"""
return tuple(int(s) for s in pd.__version__.split("."))
4 changes: 0 additions & 4 deletions tests/issues/test_issue523.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,10 @@
https://github.com/pandas-profiling/pandas-profiling/issues/XXX
"""
import pandas as pd
import pytest

from pandas_profiling import ProfileReport


@pytest.mark.skipif(
int(pd.__version__.split(".")[0]) < 1, reason="requires pandas 1 or higher"
)
def test_issue523():
# https://github.com/pandas-dev/pandas/issues/33803

Expand Down
7 changes: 2 additions & 5 deletions tests/issues/test_issue545.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
import pytest

from pandas_profiling import ProfileReport


def pandas_version():
return tuple(int(s) for s in pd.__version__.split("."))
from pandas_profiling.utils.compat import pandas_version_info


@pytest.mark.skipif(
pandas_version() <= (1, 1, 0), reason="requires pandas 1.1.1 or higher"
pandas_version_info() <= (1, 1, 0), reason="requires pandas 1.1.1 or higher"
)
def test_issue545(get_data_file):
file_name = get_data_file(
Expand Down
102 changes: 45 additions & 57 deletions tests/unit/test_typeset_default.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os

import pandas as pd
import pytest
from visions.test.series import get_series
from visions.test.utils import (
Expand All @@ -16,13 +15,6 @@
from pandas_profiling.model.typeset import ProfilingTypeSet
from tests.unit.test_utils import patch_arg

if int(pd.__version__.split(".")[0]) < 1:
from visions.dtypes.boolean import BoolDtype # noqa: F401

btype = "Bool"
else:
btype = "boolean"

base_path = os.path.abspath(os.path.dirname(__file__))

series = get_series()
Expand Down Expand Up @@ -100,6 +92,7 @@
"str_complex_nan",
"all_null_empty_str",
"py_datetime_str",
"string_dtype_series",
},
Boolean: {
"bool_series",
Expand All @@ -116,53 +109,49 @@
"timestamp_series_nat",
"date_series_nat",
},
}

if int(pd.__version__[0]) >= 1:
contains_map[Categorical].add("string_dtype_series")

contains_map[Unsupported] = {
"module",
"nan_series",
"nan_series_2",
"timedelta_series",
"timedelta_series_nat",
"timedelta_negative",
"path_series_linux",
"path_series_linux_missing",
"path_series_windows",
"url_series",
"url_nan_series",
"url_none_series",
"file_test_py",
"file_mixed_ext",
"file_test_py_missing",
"image_png",
"image_png_missing",
"image_png",
"image_png_missing",
"uuid_series",
"uuid_series_missing",
"mixed_list[str,int]",
"mixed_dict",
"callable",
"mixed_integer",
"mixed_list",
"date",
"time",
"empty",
"empty_bool",
"empty_float",
"empty_object",
"empty_int64",
"ip",
"ip_missing",
"ip_mixed_v4andv6",
"email_address_missing",
"email_address",
"all_null_none",
"all_null_nan",
"all_null_nat",
Unsupported: {
"module",
"nan_series",
"nan_series_2",
"timedelta_series",
"timedelta_series_nat",
"timedelta_negative",
"path_series_linux",
"path_series_linux_missing",
"path_series_windows",
"url_series",
"url_nan_series",
"url_none_series",
"file_test_py",
"file_mixed_ext",
"file_test_py_missing",
"image_png",
"image_png_missing",
"image_png",
"image_png_missing",
"uuid_series",
"uuid_series_missing",
"mixed_list[str,int]",
"mixed_dict",
"callable",
"mixed_integer",
"mixed_list",
"date",
"time",
"empty",
"empty_bool",
"empty_float",
"empty_object",
"empty_int64",
"ip",
"ip_missing",
"ip_mixed_v4andv6",
"email_address_missing",
"email_address",
"all_null_none",
"all_null_nan",
"all_null_nat",
},
}


Expand Down Expand Up @@ -293,9 +282,8 @@ def test_contains(name, series, contains_type, member):
"all_null_none": Unsupported,
"complex_series_py_float": Numeric,
"all_null_nan": Unsupported,
"string_dtype_series": Categorical,
}
if int(pd.__version__[0]) >= 1:
inference_map["string_dtype_series"] = Categorical


@pytest.mark.parametrize(
Expand Down
12 changes: 10 additions & 2 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
import pytest

from pandas_profiling.utils.compat import pandas_version_info
from pandas_profiling.utils.dataframe import (
expand_mixed,
read_pandas,
Expand Down Expand Up @@ -37,10 +38,17 @@ def test_read_pandas_csv():

def test_read_pandas_json():
p = Path("dataframe.json")
with pytest.raises(ValueError) as e:

expected_error, expected_message = (
(FileNotFoundError, "File dataframe.json does not exist")
if pandas_version_info() >= (1, 5)
else (ValueError, "Expected object or value")
)

with pytest.raises(expected_error) as e:
read_pandas(p)

assert str(e.value) == "Expected object or value"
assert str(e.value) == expected_message


def test_warning():
Expand Down

0 comments on commit 5c5a710

Please sign in to comment.