Skip to content

Commit

Permalink
Update test_deep_feature_synthesis to remove ft.dfs calls (#1306)
Browse files Browse the repository at this point in the history
* update test_deep_feature_synthesis to remove ft.dfs calls

* update release notes

* update release notes
  • Loading branch information
thehomebrewnerd authored Jan 13, 2021
1 parent 3c00001 commit 594641b
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 50 deletions.
3 changes: 1 addition & 2 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ Release Notes
* Changes
* Documentation Changes
* Testing Changes
* Fix non-deterministic Dask test (:pr:`1294`)
* Unpin python-graphviz package on Windows (:pr:`1296`)
* Reorganize tests into proper files/directories (:pr:`1303`)
* Reorganize and clean up tests (:pr:`1294`, :pr:`1303`, :pr:`1306`)

Thanks to the following people for contributing to this release:
:user:`rwedge`, :user:`thehomebrewnerd`
Expand Down
99 changes: 51 additions & 48 deletions featuretools/tests/synthesis/test_deep_feature_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,10 @@ def test_commutative(es):

def test_transform_consistency(transform_es):
# Generate features
feature_defs = ft.dfs(entityset=transform_es, target_entity='first',
trans_primitives=['and', 'add_numeric', 'or'],
features_only=True)
dfs_obj = DeepFeatureSynthesis(target_entity_id='first',
entityset=transform_es,
trans_primitives=['and', 'add_numeric', 'or'])
feature_defs = dfs_obj.build_features()

# Check for correct ordering of features
assert feature_with_name(feature_defs, 'a')
Expand All @@ -802,12 +803,13 @@ def test_transform_no_stack_agg(es):
# TODO: Update to work with Dask and Koalas supported primitives
if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities):
pytest.xfail("Dask EntitySets do not support the NMostCommon primitive")
feature_defs = ft.dfs(entityset=es,
target_entity="customers",
agg_primitives=[NMostCommon],
trans_primitives=[NotEqual],
max_depth=3,
features_only=True)
dfs_obj = DeepFeatureSynthesis(target_entity_id='customers',
entityset=es,
agg_primitives=[NMostCommon],
trans_primitives=[NotEqual],
max_depth=3)
feature_defs = dfs_obj.build_features()

assert not feature_with_name(feature_defs, 'id != N_MOST_COMMON(sessions.device_type)')


Expand Down Expand Up @@ -941,13 +943,12 @@ def test_f(x):
return [times.apply(lambda x: getattr(x, unit)) for unit in units]
return test_f

feat = ft.dfs(entityset=es,
target_entity="customers",
agg_primitives=[NumUnique, NMostCommon(n=3)],
trans_primitives=[TestTime, Diff],
max_depth=4,
features_only=True
)
dfs_obj = DeepFeatureSynthesis(target_entity_id='customers',
entityset=es,
agg_primitives=[NumUnique, NMostCommon(n=3)],
trans_primitives=[TestTime, Diff],
max_depth=4)
feat = dfs_obj.build_features()

for i in range(3):
f = 'NUM_UNIQUE(sessions.N_MOST_COMMON(log.countrycode)[%d])' % i
Expand All @@ -961,13 +962,13 @@ def test_seed_multi_output_feature_stacking(es):
threecommon = NMostCommon(3)
tc = ft.Feature(es['log']['product_id'], parent_entity=es["sessions"], primitive=threecommon)

fm, feat = ft.dfs(entityset=es,
target_entity="customers",
seed_features=[tc],
agg_primitives=[NumUnique],
trans_primitives=[],
max_depth=4
)
dfs_obj = DeepFeatureSynthesis(target_entity_id='customers',
entityset=es,
seed_features=[tc],
agg_primitives=[NumUnique],
trans_primitives=[],
max_depth=4)
feat = dfs_obj.build_features()

for i in range(3):
f = 'NUM_UNIQUE(sessions.N_MOST_COMMON(log.product_id)[%d])' % i
Expand Down Expand Up @@ -1452,33 +1453,33 @@ def test_primitive_ordering():
seed_is_null = ft.Feature(es['customers']['age'], primitive=IsNull)
seed_features = [seed_num_chars, seed_is_null]

features1 = ft.dfs(entityset=es,
target_entity="customers",
trans_primitives=trans_prims,
groupby_trans_primitives=groupby_trans_prim,
agg_primitives=agg_prims,
where_primitives=where_prims,
seed_features=seed_features,
max_features=-1,
max_depth=2,
features_only=2)
dfs_obj = DeepFeatureSynthesis(target_entity_id='customers',
entityset=es,
trans_primitives=trans_prims,
groupby_trans_primitives=groupby_trans_prim,
agg_primitives=agg_prims,
where_primitives=where_prims,
seed_features=seed_features,
max_features=-1,
max_depth=2)
features1 = dfs_obj.build_features()

trans_prims.reverse()
groupby_trans_prim.reverse()
agg_prims.reverse()
where_prims.reverse()
seed_features.reverse()

features2 = ft.dfs(entityset=es,
target_entity="customers",
trans_primitives=trans_prims,
groupby_trans_primitives=groupby_trans_prim,
agg_primitives=agg_prims,
where_primitives=where_prims,
seed_features=seed_features,
max_features=-1,
max_depth=2,
features_only=2)
dfs_obj = DeepFeatureSynthesis(target_entity_id='customers',
entityset=es,
trans_primitives=trans_prims,
groupby_trans_primitives=groupby_trans_prim,
agg_primitives=agg_prims,
where_primitives=where_prims,
seed_features=seed_features,
max_features=-1,
max_depth=2)
features2 = dfs_obj.build_features()

assert len(features1) == len(features2)

Expand All @@ -1496,11 +1497,13 @@ def test_no_transform_stacking():
relationships = [("first", 'id', 'second', 'first_id')]
es = ft.EntitySet("data", entities, relationships)

feature_defs = ft.dfs(entityset=es, target_entity='second',
trans_primitives=['negate', 'add_numeric'],
agg_primitives=['sum'],
max_depth=4,
features_only=2)
dfs_obj = DeepFeatureSynthesis(target_entity_id='second',
entityset=es,
trans_primitives=['negate', 'add_numeric'],
agg_primitives=['sum'],
max_depth=4)
feature_defs = dfs_obj.build_features()

expected = [
'first_id',
'B',
Expand Down

0 comments on commit 594641b

Please sign in to comment.