From 5732626ab4de561b8884dafd9bcb9ee2cc33840c Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 13 Jan 2021 13:49:08 -0600 Subject: [PATCH 1/3] update test_deep_feature_synthesis to remove ft.dfs calls --- .../synthesis/test_deep_feature_synthesis.py | 99 ++++++++++--------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/featuretools/tests/synthesis/test_deep_feature_synthesis.py b/featuretools/tests/synthesis/test_deep_feature_synthesis.py index 983d43a2b4..9439c0aebb 100644 --- a/featuretools/tests/synthesis/test_deep_feature_synthesis.py +++ b/featuretools/tests/synthesis/test_deep_feature_synthesis.py @@ -780,9 +780,10 @@ def test_commutative(es): def test_transform_consistency(transform_es): # Generate features - feature_defs = ft.dfs(entityset=transform_es, target_entity='first', - trans_primitives=['and', 'add_numeric', 'or'], - features_only=True) + dfs_obj = DeepFeatureSynthesis(target_entity_id='first', + entityset=transform_es, + trans_primitives=['and', 'add_numeric', 'or']) + feature_defs = dfs_obj.build_features() # Check for correct ordering of features assert feature_with_name(feature_defs, 'a') @@ -802,12 +803,13 @@ def test_transform_no_stack_agg(es): # TODO: Update to work with Dask and Koalas supported primitives if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities): pytest.xfail("Dask EntitySets do not support the NMostCommon primitive") - feature_defs = ft.dfs(entityset=es, - target_entity="customers", - agg_primitives=[NMostCommon], - trans_primitives=[NotEqual], - max_depth=3, - features_only=True) + dfs_obj = DeepFeatureSynthesis(target_entity_id='customers', + entityset=es, + agg_primitives=[NMostCommon], + trans_primitives=[NotEqual], + max_depth=3) + feature_defs = dfs_obj.build_features() + assert not feature_with_name(feature_defs, 'id != N_MOST_COMMON(sessions.device_type)') @@ -941,13 +943,12 @@ def test_f(x): return [times.apply(lambda x: getattr(x, unit)) for unit in units] return test_f - feat = ft.dfs(entityset=es, - target_entity="customers", - agg_primitives=[NumUnique, NMostCommon(n=3)], - trans_primitives=[TestTime, Diff], - max_depth=4, - features_only=True - ) + dfs_obj = DeepFeatureSynthesis(target_entity_id='customers', + entityset=es, + agg_primitives=[NumUnique, NMostCommon(n=3)], + trans_primitives=[TestTime, Diff], + max_depth=4) + feat = dfs_obj.build_features() for i in range(3): f = 'NUM_UNIQUE(sessions.N_MOST_COMMON(log.countrycode)[%d])' % i @@ -961,13 +962,13 @@ def test_seed_multi_output_feature_stacking(es): threecommon = NMostCommon(3) tc = ft.Feature(es['log']['product_id'], parent_entity=es["sessions"], primitive=threecommon) - fm, feat = ft.dfs(entityset=es, - target_entity="customers", - seed_features=[tc], - agg_primitives=[NumUnique], - trans_primitives=[], - max_depth=4 - ) + dfs_obj = DeepFeatureSynthesis(target_entity_id='customers', + entityset=es, + seed_features=[tc], + agg_primitives=[NumUnique], + trans_primitives=[], + max_depth=4) + feat = dfs_obj.build_features() for i in range(3): f = 'NUM_UNIQUE(sessions.N_MOST_COMMON(log.product_id)[%d])' % i @@ -1452,16 +1453,16 @@ def test_primitive_ordering(): seed_is_null = ft.Feature(es['customers']['age'], primitive=IsNull) seed_features = [seed_num_chars, seed_is_null] - features1 = ft.dfs(entityset=es, - target_entity="customers", - trans_primitives=trans_prims, - groupby_trans_primitives=groupby_trans_prim, - agg_primitives=agg_prims, - where_primitives=where_prims, - seed_features=seed_features, - max_features=-1, - max_depth=2, - features_only=2) + dfs_obj = DeepFeatureSynthesis(target_entity_id='customers', + entityset=es, + trans_primitives=trans_prims, + groupby_trans_primitives=groupby_trans_prim, + agg_primitives=agg_prims, + where_primitives=where_prims, + seed_features=seed_features, + max_features=-1, + max_depth=2) + features1 = dfs_obj.build_features() trans_prims.reverse() groupby_trans_prim.reverse() @@ -1469,16 +1470,16 @@ def test_primitive_ordering(): where_prims.reverse() seed_features.reverse() - features2 = ft.dfs(entityset=es, - target_entity="customers", - trans_primitives=trans_prims, - groupby_trans_primitives=groupby_trans_prim, - agg_primitives=agg_prims, - where_primitives=where_prims, - seed_features=seed_features, - max_features=-1, - max_depth=2, - features_only=2) + dfs_obj = DeepFeatureSynthesis(target_entity_id='customers', + entityset=es, + trans_primitives=trans_prims, + groupby_trans_primitives=groupby_trans_prim, + agg_primitives=agg_prims, + where_primitives=where_prims, + seed_features=seed_features, + max_features=-1, + max_depth=2) + features2 = dfs_obj.build_features() assert len(features1) == len(features2) @@ -1496,11 +1497,13 @@ def test_no_transform_stacking(): relationships = [("first", 'id', 'second', 'first_id')] es = ft.EntitySet("data", entities, relationships) - feature_defs = ft.dfs(entityset=es, target_entity='second', - trans_primitives=['negate', 'add_numeric'], - agg_primitives=['sum'], - max_depth=4, - features_only=2) + dfs_obj = DeepFeatureSynthesis(target_entity_id='second', + entityset=es, + trans_primitives=['negate', 'add_numeric'], + agg_primitives=['sum'], + max_depth=4) + feature_defs = dfs_obj.build_features() + expected = [ 'first_id', 'B', From 60823e0f634b29805f34fa97c7fb8ef0fa3cfea7 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 13 Jan 2021 13:51:53 -0600 Subject: [PATCH 2/3] update release notes --- docs/source/release_notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 12d0b4f521..f5b6fdda2e 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -11,6 +11,7 @@ Release Notes * Fix non-deterministic Dask test (:pr:`1294`) * Unpin python-graphviz package on Windows (:pr:`1296`) * Reorganize tests into proper files/directories (:pr:`1303`) + * Clean up synthesis tests to remove unnecessary calls to ``ft.dfs`` (:pr:`1306`) Thanks to the following people for contributing to this release: :user:`rwedge`, :user:`thehomebrewnerd` From eb1b578b83e276433dc972d72151b7367d7ab0b0 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 13 Jan 2021 14:39:08 -0600 Subject: [PATCH 3/3] update release notes --- docs/source/release_notes.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index f5b6fdda2e..3facabc317 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -8,10 +8,8 @@ Release Notes * Changes * Documentation Changes * Testing Changes - * Fix non-deterministic Dask test (:pr:`1294`) * Unpin python-graphviz package on Windows (:pr:`1296`) - * Reorganize tests into proper files/directories (:pr:`1303`) - * Clean up synthesis tests to remove unnecessary calls to ``ft.dfs`` (:pr:`1306`) + * Reorganize and clean up tests (:pr:`1294`, :pr:`1303`, :pr:`1306`) Thanks to the following people for contributing to this release: :user:`rwedge`, :user:`thehomebrewnerd`