Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganize Tests #1303

Merged
merged 8 commits into from
Jan 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Release Notes
* Testing Changes
* Fix non-deterministic Dask test (:pr:`1294`)
* Unpin python-graphviz package on Windows (:pr:`1296`)
* Reorganize tests into proper files/directories (:pr:`1303`)

Thanks to the following people for contributing to this release:
:user:`rwedge`, :user:`thehomebrewnerd`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_cfm_dask_compose(dask_es, lt):
assert (feature_matrix[property_feature.get_name()] == feature_matrix['label_func']).values.all()


# tests approximate, skip for dask
# tests approximate, skip for dask/koalas
def test_cfm_approximate_correct_ordering():
trips = {
'trip_id': [i for i in range(1000)],
Expand All @@ -219,9 +219,7 @@ def test_cfm_approximate_correct_ordering():
isinstance(feature.base_features[0],
AggregationFeature)]
property_feature = IdentityFeature(es['trips']['trip_id'])
# direct_agg_feat = DirectFeature(Sum(es['trips']['trip_duration'],
# es['flights']),
# es['trips'])

cutoff_time = pd.DataFrame.from_dict({'instance_id': df['trip_id'],
'time': df['flight_time']})
time_feature = IdentityFeature(es['trips']['flight_time'])
Expand All @@ -243,7 +241,7 @@ def test_cfm_approximate_correct_ordering():
assert ((pd.isnull(x) and pd.isnull(y)) or (x == y))


# uses approximate, skip for dask entitysets
# uses approximate, skip for dask/koalas entitysets
def test_cfm_no_cutoff_time_index(pd_es):
agg_feat = ft.Feature(pd_es['log']['id'], parent_entity=pd_es['sessions'], primitive=Count)
agg_feat4 = ft.Feature(agg_feat, parent_entity=pd_es['customers'], primitive=Sum)
Expand Down Expand Up @@ -316,7 +314,7 @@ def test_saveprogress(es, tmpdir):
save_progress=save_progress)
_, _, files = next(os.walk(save_progress))
files = [os.path.join(save_progress, file) for file in files]
# there is 17 datetime files created above
# there are 17 datetime files created above
assert len(files) == 17
list_df = []
for file_ in files:
Expand Down Expand Up @@ -523,7 +521,7 @@ def test_training_window_overlap(pd_es):
}).astype({'time': 'datetime64[ns]'})

# Case1. include_cutoff_time = True
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=pd_es,
cutoff_time=cutoff_time,
Expand All @@ -534,7 +532,7 @@ def test_training_window_overlap(pd_es):
np.testing.assert_array_equal(actual.values, [1, 9])

# Case2. include_cutoff_time = False
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=pd_es,
cutoff_time=cutoff_time,
Expand All @@ -560,7 +558,7 @@ def test_include_cutoff_time_without_training_window(es):
}).astype({'time': 'datetime64[ns]'})

# Case1. include_cutoff_time = True
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=es,
cutoff_time=cutoff_time,
Expand All @@ -570,7 +568,7 @@ def test_include_cutoff_time_without_training_window(es):
np.testing.assert_array_equal(actual.values, [1, 6])

# Case2. include_cutoff_time = False
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=es,
cutoff_time=cutoff_time,
Expand All @@ -580,7 +578,7 @@ def test_include_cutoff_time_without_training_window(es):
np.testing.assert_array_equal(actual.values, [0, 5])

# Case3. include_cutoff_time = True with single cutoff time value
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=es,
cutoff_time=pd.to_datetime("2011-04-09 10:31:00"),
Expand All @@ -591,7 +589,7 @@ def test_include_cutoff_time_without_training_window(es):
np.testing.assert_array_equal(actual.values, [6])

# Case4. include_cutoff_time = False with single cutoff time value
actual = ft.calculate_feature_matrix(
actual = calculate_feature_matrix(
features=[count_log],
entityset=es,
cutoff_time=pd.to_datetime("2011-04-09 10:31:00"),
Expand Down Expand Up @@ -1544,7 +1542,7 @@ def test_no_data_for_cutoff_time(mock_customer):
trans_per_customer = ft.Feature(es["transactions"]["transaction_id"], parent_entity=es["customers"], primitive=Count)
features = [trans_per_customer, ft.Feature(trans_per_session, parent_entity=es["customers"], primitive=Max)]

fm = ft.calculate_feature_matrix(features, entityset=es, cutoff_time=cutoff_times)
fm = calculate_feature_matrix(features, entityset=es, cutoff_time=cutoff_times)

# due to default values for each primitive
# count will be 0, but max will nan
Expand Down Expand Up @@ -1619,9 +1617,9 @@ def test_some_instances_not_in_data(pd_es):
def test_missing_instances_with_categorical_index(pd_es):
instance_ids = [0, 1, 3, 2]
features = ft.dfs(entityset=pd_es, target_entity='customers', features_only=True)
fm = ft.calculate_feature_matrix(entityset=pd_es,
features=features,
instance_ids=instance_ids)
fm = calculate_feature_matrix(entityset=pd_es,
features=features,
instance_ids=instance_ids)
assert all(fm.index.values == instance_ids)
assert isinstance(fm.index, pd.CategoricalIndex)

Expand Down Expand Up @@ -1691,7 +1689,7 @@ def __call__(self, update, progress_percent, time_elapsed):
trans_per_session = ft.Feature(es["transactions"]["transaction_id"], parent_entity=es["sessions"], primitive=Count)
trans_per_customer = ft.Feature(es["transactions"]["transaction_id"], parent_entity=es["customers"], primitive=Count)
features = [trans_per_session, ft.Feature(trans_per_customer, entity=es["sessions"])]
ft.calculate_feature_matrix(features, entityset=es, progress_callback=mock_progress_callback)
calculate_feature_matrix(features, entityset=es, progress_callback=mock_progress_callback)

# second to last entry is the last update from feature calculation
assert np.isclose(mock_progress_callback.progress_history[-2], FEATURE_CALCULATION_PERCENTAGE * 100)
Expand All @@ -1705,7 +1703,7 @@ def __call__(self, update, progress_percent, time_elapsed):
pd.to_datetime("2014-01-01 02:00:00"),
pd.to_datetime("2014-01-01 03:00:00")]})

ft.calculate_feature_matrix(features, entityset=es, cutoff_time=cutoff_time, progress_callback=mock_progress_callback)
calculate_feature_matrix(features, entityset=es, cutoff_time=cutoff_time, progress_callback=mock_progress_callback)
assert np.isclose(mock_progress_callback.progress_history[-2], FEATURE_CALCULATION_PERCENTAGE * 100)
assert np.isclose(mock_progress_callback.total_update, 100.0)
assert np.isclose(mock_progress_callback.total_progress_percent, 100.0)
Expand All @@ -1731,10 +1729,10 @@ def __call__(self, update, progress_percent, time_elapsed):

with cluster() as (scheduler, [a, b]):
dkwargs = {'cluster': scheduler['address']}
ft.calculate_feature_matrix(features,
entityset=pd_mock_customer,
progress_callback=mock_progress_callback,
dask_kwargs=dkwargs)
calculate_feature_matrix(features,
entityset=pd_mock_customer,
progress_callback=mock_progress_callback,
dask_kwargs=dkwargs)

assert np.isclose(mock_progress_callback.total_update, 100.0)
assert np.isclose(mock_progress_callback.total_progress_percent, 100.0)
Expand Down
22 changes: 15 additions & 7 deletions featuretools/tests/demo_tests/test_demo_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import pytest

from featuretools.demo import load_flight, load_retail
from featuretools.synthesis import dfs
from featuretools.demo import load_flight, load_mock_customer, load_retail


@pytest.fixture(autouse=True)
Expand All @@ -22,11 +21,20 @@ def test_load_retail_diff():
assert es_second['order_products'].df.shape[0] == nrows_second


def test_mock_customer(mock_customer):
es = mock_customer
fm, fl = dfs(entityset=es, target_entity="customers", max_depth=3)
for feature in fl:
assert feature.get_name() in fm.columns
def test_mock_customer():
n_customers = 4
n_products = 3
n_sessions = 30
n_transactions = 400
es = load_mock_customer(n_customers=n_customers, n_products=n_products, n_sessions=n_sessions,
n_transactions=n_transactions, random_seed=0, return_entityset=True)
entity_names = [entity.id for entity in es.entities]
expected_names = ['transactions', 'products', 'sessions', 'customers']
assert set(expected_names) == set(entity_names)
assert len(es['customers'].df) == 4
assert len(es['products'].df) == 3
assert len(es['sessions'].df) == 30
assert len(es['transactions'].df) == 400


def test_load_flight():
Expand Down
Loading