Skip to content

Commit

Permalink
FIX: Infer categorical dtypes before boolean resolution (#2379)
Browse files Browse the repository at this point in the history
* Add test for #2317

* Improve variable_type docstring

* Give precedence to categorical dtype inference
  • Loading branch information
MaozGelbart authored Dec 12, 2020
1 parent 4560b90 commit 3b4c21a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
15 changes: 8 additions & 7 deletions seaborn/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,28 +1177,32 @@ def _add_axis_labels(self, ax, default_x="", default_y=""):


def variable_type(vector, boolean_type="numeric"):
"""Determine whether a vector contains numeric, categorical, or dateime data.
"""
Determine whether a vector contains numeric, categorical, or datetime data.
This function differs from the pandas typing API in two ways:
- Python sequences or object-typed PyData objects are considered numeric if
all of their entries are numeric.
- String or mixed-type data are considered categorical even if not
explicitly represented as a :class:pandas.api.types.CategoricalDtype`.
explicitly represented as a :class:`pandas.api.types.CategoricalDtype`.
Parameters
----------
vector : :func:`pandas.Series`, :func:`numpy.ndarray`, or Python sequence
Input data to test.
binary_type : 'numeric' or 'categorical'
boolean_type : 'numeric' or 'categorical'
Type to use for vectors containing only 0s and 1s (and NAs).
Returns
-------
var_type : 'numeric', 'categorical', or 'datetime'
Name identifying the type of data in the vector.
"""
# If a categorical dtype is set, infer categorical
if pd.api.types.is_categorical_dtype(vector):
return "categorical"

# Special-case all-na data, which is always "numeric"
if pd.isna(vector).all():
return "numeric"
Expand All @@ -1222,9 +1226,6 @@ def variable_type(vector, boolean_type="numeric"):
if pd.api.types.is_numeric_dtype(vector):
return "numeric"

if pd.api.types.is_categorical_dtype(vector):
return "categorical"

if pd.api.types.is_datetime64_dtype(vector):
return "datetime"

Expand Down
3 changes: 3 additions & 0 deletions seaborn/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,9 @@ def test_variable_type(self):
s = pd.Series([True, False, False])
assert variable_type(s) == "numeric"
assert variable_type(s, boolean_type="categorical") == "categorical"
s_cat = s.astype("category")
assert variable_type(s_cat, boolean_type="categorical") == "categorical"
assert variable_type(s_cat, boolean_type="numeric") == "categorical"

s = pd.Series([pd.Timestamp(1), pd.Timestamp(2)])
assert variable_type(s) == "datetime"
Expand Down

0 comments on commit 3b4c21a

Please sign in to comment.