Skip to content

Commit

Permalink
Shuffled around the order of the DropColumns transformer in the tests…
Browse files Browse the repository at this point in the history
… to accomodate the new check for Unknown in get_pp_components. Made Email get treated properly in testing as WW should infer it properly now. Made infer_feature_types replace all pd.NA with np.nan for series as well as dataframes.
  • Loading branch information
chukarsten committed Aug 10, 2021
1 parent acc42bd commit b4f1ae0
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
14 changes: 5 additions & 9 deletions evalml/tests/pipeline_tests/test_pipeline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def _get_test_data_from_configuration(
"[email protected]",
"[email protected]",
"[email protected]",
"$titanic_data%&@hotmail.com",
"foo*EMAIL@email.org",
"[email protected]",
"fooEMAIL@email.org",
"[email protected]",
"[email protected]",
],
Expand Down Expand Up @@ -197,11 +197,7 @@ def test_make_pipeline(
if "text" in column_names and input_type == "ww"
else []
)
email_featurizer = (
[EmailFeaturizer]
if "email" in column_names and input_type == "ww"
else []
)
email_featurizer = [EmailFeaturizer] if "email" in column_names else []
url_featurizer = (
[URLFeaturizer] if "url" in column_names and input_type == "ww" else []
)
Expand All @@ -213,7 +209,7 @@ def test_make_pipeline(
)
drop_col = (
[DropColumns]
if any(ltype in column_names for ltype in ["url", "email", "text"])
if any(ltype in column_names for ltype in ["url", "text"])
and input_type == "pd"
else []
)
Expand All @@ -223,8 +219,8 @@ def test_make_pipeline(
+ url_featurizer
+ drop_null
+ text_featurizer
+ imputer
+ drop_col
+ imputer
+ datetime
+ delayed_features
+ ohe
Expand Down
3 changes: 3 additions & 0 deletions evalml/utils/woodwork_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def infer_feature_types(data, feature_types=None):
return data

if isinstance(data, pd.Series):
if all([isinstance(x, type(pd.NA)) for x in data]):
data = data.replace(pd.NA, np.nan)
feature_types = "Double"
return ww.init_series(data, logical_type=feature_types)
else:
ww_data = data.copy()
Expand Down

0 comments on commit b4f1ae0

Please sign in to comment.