diff --git a/evalml/tests/component_tests/test_imputer.py b/evalml/tests/component_tests/test_imputer.py index 427b924448..0252489665 100644 --- a/evalml/tests/component_tests/test_imputer.py +++ b/evalml/tests/component_tests/test_imputer.py @@ -185,6 +185,7 @@ def test_categorical_and_numeric_input(imputer_test_data): "object col": pd.Series(["b", "b", "a", "c", "d"] * 4, dtype="category"), "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4, "bool col": [True, False, False, True, True] * 4, + "bool col 2": [True, False, False, True, True] * 4, "natural language col": pd.Series( ["cats are really great", "don't", "believe", "me?", "well..."] * 4, dtype="string", diff --git a/evalml/tests/component_tests/test_simple_imputer.py b/evalml/tests/component_tests/test_simple_imputer.py index a1adc8ccda..2aa8d13da9 100644 --- a/evalml/tests/component_tests/test_simple_imputer.py +++ b/evalml/tests/component_tests/test_simple_imputer.py @@ -627,21 +627,10 @@ def test_simple_imputer_boolean_nullable_valid_train_empty_test(): assert isinstance(X_t.ww.logical_types["a"], BooleanNullable) -def test_simple_imputer_all_bools_at_fit_and_transform(): +def test_simple_imputer_all_bools_at_fit_and_transform(imputer_test_data): """Confirms that the simple imputer can handle data with only the bool dtype which sklearn would error on.""" - X = pd.DataFrame( - { - "bools1": pd.Series([True, False, True, True] * 20), - "bools2": pd.Series([True, False, True, False] * 20), - }, - ) - X.ww.init( - logical_types={ - "bools1": "Boolean", - "bools2": "Boolean", - }, - ) + X = imputer_test_data.ww.select("boolean") imp = SimpleImputer(impute_strategy="most_frequent") imp.fit(X) @@ -656,7 +645,9 @@ def test_simple_imputer_all_bools_at_fit_and_transform_with_all_null_and_nl_cols """Confirm that the simple imputer, which doesn't pass all null or natural language columns to sklearn works when the remaining columns are all teh bool dtype, which sklearn would error on. """ - X = imputer_test_data.ww[["all nan", "bool col", "natural language col"]] + X = imputer_test_data.ww[ + ["all nan", "bool col", "bool col 2", "natural language col"] + ] X_copy = X.ww.copy() imp = SimpleImputer(impute_strategy="most_frequent") @@ -666,37 +657,22 @@ def test_simple_imputer_all_bools_at_fit_and_transform_with_all_null_and_nl_cols pd.testing.assert_frame_equal(X_copy.ww.drop("all nan"), X_imputed) -def test_simple_imputer_all_bools_at_fit_with_nans_at_transform(): +def test_simple_imputer_all_bools_at_fit_with_nans_at_transform(imputer_test_data): """Confirm that the simple imputer can handle data whose dtype is different at transform when originally the data only had bool dtype columns.""" # X_train will be only bool dtypes so the _component_obj won't be fit - X_train = pd.DataFrame( - { - "bools1": pd.Series([True, False, True, True] * 20), - "bools2": pd.Series([True, False, True, False] * 20), - }, - ) - X_train.ww.init( - logical_types={ - "bools1": "Boolean", - "bools2": "Boolean", - }, - ) + X_train = imputer_test_data.ww.select("boolean") imp = SimpleImputer(impute_strategy="most_frequent") imp.fit(X_train) # X_test will be BooleanNullable which will be a problem when _component_obj isn't fit - X_test = pd.DataFrame( - { - "bools1": pd.Series([True, False, pd.NA, True] * 20), - "bools2": pd.Series([True, pd.NA, True, False] * 20), - }, - ) + X_test = X_train.copy() + X_test.iloc[-1] = np.nan X_test.ww.init( logical_types={ - "bools1": "BooleanNullable", - "bools2": "BooleanNullable", + "bool col": "BooleanNullable", + "bool col 2": "BooleanNullable", }, ) diff --git a/evalml/tests/component_tests/test_time_series_imputer.py b/evalml/tests/component_tests/test_time_series_imputer.py index 14510647f9..6a582c6025 100644 --- a/evalml/tests/component_tests/test_time_series_imputer.py +++ b/evalml/tests/component_tests/test_time_series_imputer.py @@ -171,6 +171,7 @@ def test_categorical_and_numeric_input(imputer_test_data): "object col": pd.Series(["b", "b", "a", "c", "d"] * 4, dtype="category"), "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4, "bool col": [True, False, False, True, True] * 4, + "bool col 2": [True, False, False, True, True] * 4, "natural language col": pd.Series( ["cats are really great", "don't", "believe", "me?", "well..."] * 4, dtype="string", diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py index 92387dd649..301e126d45 100644 --- a/evalml/tests/conftest.py +++ b/evalml/tests/conftest.py @@ -2227,6 +2227,7 @@ def X_no_nans(): "object col": ["b", "b", "a", "c", "d"] * 4, "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4, "bool col": [True, False, False, True, True] * 4, + "bool col 2": [True, False, False, True, True] * 4, "natural language col": pd.Series( ["cats are really great", "don't", "believe", "me?", "well..."] * 4, dtype="string", @@ -2241,6 +2242,7 @@ def X_no_nans(): "object col": "categorical", "float col": "double", "bool col": "boolean", + "bool col 2": "boolean", "natural language col": "NaturalLanguage", }, )