From 9777b85c653a82e3cc908b965fb5665381c3fe64 Mon Sep 17 00:00:00 2001 From: Alexandre Quemy Date: Wed, 24 May 2023 09:23:16 +0200 Subject: [PATCH] fix: revert infer all `Numeric` vars as `TimeSeries` when tsmode=True (#1343)" (#1346) This reverts commit ba957be1f7a258ab2fe180338e6515d37b1b8c0e. --- src/ydata_profiling/model/typeset.py | 19 ++++++++++++++----- tests/unit/test_time_series.py | 5 +++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/ydata_profiling/model/typeset.py b/src/ydata_profiling/model/typeset.py index 9e30022ff..ee1c42e78 100644 --- a/src/ydata_profiling/model/typeset.py +++ b/src/ydata_profiling/model/typeset.py @@ -293,11 +293,20 @@ def get_relations() -> Sequence[TypeRelation]: @series_not_empty @series_handle_nulls def contains_op(series: pd.Series, state: dict) -> bool: - return ( - pdt.is_numeric_dtype(series) - and not pdt.is_bool_dtype(series) - and series.nunique() > 1 - ) + def is_timedependent(series: pd.Series) -> bool: + autocorrelation_threshold = config.vars.timeseries.autocorrelation + lags = config.vars.timeseries.lags + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + for lag in lags: + autcorr = series.autocorr(lag=lag) + if autcorr >= autocorrelation_threshold: + return True + + return False + + is_numeric = pdt.is_numeric_dtype(series) and not pdt.is_bool_dtype(series) + return is_numeric and is_timedependent(series) types = {Unsupported, Boolean, Numeric, Text, Categorical, DateTime} if config.vars.path.active: diff --git a/tests/unit/test_time_series.py b/tests/unit/test_time_series.py index 820361556..be472f579 100644 --- a/tests/unit/test_time_series.py +++ b/tests/unit/test_time_series.py @@ -25,6 +25,7 @@ def html_profile() -> str: "constant": np.ones(size), "sin": [round(np.sin(x * np.pi / 180), 2) for x in time_steps], "cos": [round(np.cos(x * np.pi / 180), 2) for x in time_steps], + "uniform": [round(x, 2) for x in np.random.uniform(0, 10, size)], "gaussian": [round(x, 2) for x in np.random.normal(0, 1, size)], } ) @@ -36,7 +37,7 @@ def html_profile() -> str: def test_timeseries_identification(html_profile: str): assert "TimeSeries" in html_profile, "TimeSeries not detected" assert ( - "TimeSeries9" in html_profile + "TimeSeries8" in html_profile ), "TimeSeries incorrectly identified" @@ -45,7 +46,7 @@ def test_timeseries_autocorrelation_tab(html_profile: str): "role=tab data-toggle=tab>Autocorrelation<" in html_profile ), "TimeSeries not detected" assert ( - html_profile.count("role=tab data-toggle=tab>Autocorrelation<") == 9 + html_profile.count("role=tab data-toggle=tab>Autocorrelation<") == 8 ), "TimeSeries autocorrelation tabs incorrectly generated"