Skip to content

Commit

Permalink
feat!: remove utils.is_categorical
Browse files Browse the repository at this point in the history
BREAKING CHANGE: function `edvart.utils.is_categorical` is removed. `edvart.data_types.is_categorical`
can be used instead, with similar behavior.

Resolves #25.
  • Loading branch information
mbelak-dtml committed Aug 30, 2023
1 parent a5b524b commit fc601b1
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 23 deletions.
22 changes: 0 additions & 22 deletions edvart/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,6 @@
from edvart.data_types import is_numeric


def is_categorical(series: pd.Series, nunique_max: int = 20) -> bool:
"""
A heuristic of whether a series is categorical or numerical.
Parameters
----------
series: pd.Series
Input series
nunique_max: int (default = 20)
Maximum number of unique values for a numeric series to be regarded as categorical.
No limit on number of unique values if set to a negative number.
Returns
-------
bool
True if series contains categorical values, otherwise False
"""
return (
(nunique_max < 0 or series.nunique() <= nunique_max) or not is_numeric(series)
) and not pd.core.dtypes.common.is_datetime_or_timedelta_dtype(series)


def top_frequent_values(series: pd.Series, n_top: int = 10) -> Dict[Any, float]:
"""
Counts top n most frequent values in series along with other value counts and NULL value counts.
Expand Down
1 change: 0 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def test_full_na_series():
warnings.simplefilter(action="error", category=RuntimeWarning)
result = func(series)
assert math.isnan(float(result))
assert utils.is_categorical(series)
assert utils.num_unique_values(series) == 0


Expand Down

0 comments on commit fc601b1

Please sign in to comment.