Skip to content

Commit

Permalink
change treatment of generic column dtype object for pandas dataframes.
Browse files Browse the repository at this point in the history
The `object` type will be treated as `string` in the future.

add new test case to `test_feature_validator.py`
  • Loading branch information
Louquinze committed Mar 4, 2022
1 parent b00a250 commit 8d9e159
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
2 changes: 1 addition & 1 deletion autosklearn/data/feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def get_feat_type_from_columns(
f"Input Column {column} has generic type object. "
f"Autosklearn will treat this column as string. "
f"Please ensure that this setting is suitable for your task.",
UserWarning
UserWarning,
)
feat_type[column] = "string"
elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(
Expand Down
22 changes: 13 additions & 9 deletions test/test_data/test_feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import pytest
import sklearn.datasets
import sklearn.model_selection
from autosklearn.data.feature_validator import FeatureValidator
from pandas.api.types import is_categorical_dtype, is_numeric_dtype, is_string_dtype
from scipy import sparse

from autosklearn.data.feature_validator import FeatureValidator


# Fixtures to be used in this class. By default all elements have 100 datapoints
@pytest.fixture
Expand Down Expand Up @@ -532,17 +533,20 @@ def dummy_func(self):
"dummy_array": [array] * 4,
"dummy_string": [dummy_stirng] * 4,
"type_mix_column": [dummy_stirng, dummy_object, array, lst],
"cat_column": ["a", "b", "a", "b"]
"cat_column": ["a", "b", "a", "b"],
}
)
df["cat_column"] = df["cat_column"].astype("category")

with pytest.warns(UserWarning, match=r'Input Column dummy_object has '
r'generic type object. '
r'Autosklearn will treat '
r'this column as string. '
r'Please ensure that this setting '
r'is suitable for your task.'):
with pytest.warns(
UserWarning,
match=r"Input Column dummy_object has "
r"generic type object. "
r"Autosklearn will treat "
r"this column as string. "
r"Please ensure that this setting "
r"is suitable for your task.",
):
validator = FeatureValidator()
feat_type = validator.get_feat_type_from_columns(df)

Expand All @@ -552,7 +556,7 @@ def dummy_func(self):
"dummy_array": "string",
"dummy_string": "string",
"type_mix_column": "string",
"cat_column": "categorical"
"cat_column": "categorical",
}

assert feat_type == column_types

0 comments on commit 8d9e159

Please sign in to comment.