From 9206ef8da56224454bc3daf7d655cfd4fc58a7d4 Mon Sep 17 00:00:00 2001
From: Jett Oristaglio <jett.oristaglio@jett.oristaglio>
Date: Thu, 1 Mar 2018 13:28:26 -0500
Subject: [PATCH] =?UTF-8?q?Absolute=20Tolerance=20increased=20for=20testin?=
 =?UTF-8?q?g=E2=80=93=20batch=20processing=20failures=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixing testing failures, removing deprecated parameters.
---
 pic2vec/data_featurizing.py                   |  1 -
 pic2vec/image_featurizer.py                   |  1 -
 tests/build_featurizer_test.py                |  2 +-
 tests/data_featurizing_test.py                | 25 +++++--------------
 .../featurize_data_check_csv_features_only    |  5 ----
 .../csv_testing/featurize_data_check_csv_full | 10 ++++----
 tests/feature_preprocessing_test.py           | 11 +-------
 tests/image_featurizer_test.py                |  8 +-----
 8 files changed, 14 insertions(+), 49 deletions(-)
 delete mode 100644 tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only

diff --git a/pic2vec/data_featurizing.py b/pic2vec/data_featurizing.py
index d1a4b01..eb688f5 100644
--- a/pic2vec/data_featurizing.py
+++ b/pic2vec/data_featurizing.py
@@ -134,7 +134,6 @@ def _create_features_df_helper(data_array, full_feature_array, image_column_head
 
 
 def create_features(data_array, new_feature_array, image_column_header,
-                    image_list,
                     save_features=False):
     """
     Write the feature array to a new csv, and append the features to the appropriate
diff --git a/pic2vec/image_featurizer.py b/pic2vec/image_featurizer.py
index ccfecee..eec5d57 100644
--- a/pic2vec/image_featurizer.py
+++ b/pic2vec/image_featurizer.py
@@ -535,7 +535,6 @@ def _featurize_helper(self, features, image_column_headers,
                 create_features(batch_data[column],
                                 partial_features,
                                 image_column_headers[column],
-                                self.image_dict[image_column_headers[column]],
                                 save_features=save_features)
 
             features_list.append(df_features)
diff --git a/tests/build_featurizer_test.py b/tests/build_featurizer_test.py
index ea8cf61..885eeec 100644
--- a/tests/build_featurizer_test.py
+++ b/tests/build_featurizer_test.py
@@ -21,7 +21,7 @@
 random.seed(5102020)
 
 # Tolerance for prediction error
-ATOL = 0.0001
+ATOL = 0.00001
 
 # Building the checking model
 input_layer = Input(shape=(100, ))
diff --git a/tests/data_featurizing_test.py b/tests/data_featurizing_test.py
index f96b47e..dc556b8 100644
--- a/tests/data_featurizing_test.py
+++ b/tests/data_featurizing_test.py
@@ -84,16 +84,8 @@ def test_create_features_bad_feature_array():
     # An error array with the wrong size
     error_feature_array = np.zeros((4, 3, 2))
     with pytest.raises(ValueError):
-        create_features(CHECK_DATA, error_feature_array, pd.read_csv(CHECK_CSV_IMAGES_PATH),
-                        'image', CHECK_IMAGE_LIST, continued_column=False,
-                        save_features=True)
-
-
-def test_features_to_csv_bad_column_header():
-    """Raise an error when the column header is not found in the csv"""
-    with pytest.raises(ValueError):
-        create_features(CHECK_DATA, CHECK_ARRAY, pd.read_csv(CHECK_CSV_IMAGES_PATH), 'derp',
-                        CHECK_IMAGE_LIST, continued_column=False,
+        create_features(CHECK_DATA, error_feature_array,
+                        'image',
                         save_features=True)
 
 
@@ -102,15 +94,13 @@ def test_features_to_csv_bad_data_array():
     # An error array with the wrong size
     error_array = np.zeros((4, 3, 2))
     with pytest.raises(ValueError):
-        create_features(error_array, CHECK_ARRAY, pd.read_csv(CHECK_CSV_IMAGES_PATH), 'image',
-                        CHECK_IMAGE_LIST, continued_column=False,
+        create_features(error_array, CHECK_ARRAY, 'image',
                         save_features=True)
 
 
 def test_create_features_df_helper():
     """Test that the correct full array is created to be passed to the create_features function"""
-    df = pd.read_csv(CHECK_CSV_IMAGES_PATH)
-    full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image', df)[0]
+    full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image')
     assert full_df_test.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
 
 
@@ -118,11 +108,8 @@ def test_features_to_csv():
     """Test that the model creates the correct csvs from a toy array, csv, and image list"""
     # Create the test
     full_test_dataframe = create_features(CHECK_DATA, CHECK_ARRAY,
-                                          pd.read_csv(CHECK_CSV_IMAGES_PATH),
-                                          'image', CHECK_IMAGE_LIST, continued_column=False,
+                                          'image',
                                           save_features=True)
 
-    print(full_test_dataframe[1])
     # Assert that the dataframes returned are correct
-    assert full_test_dataframe[1].equals(pd.read_csv(CHECK_CSV_FEATURES_ONLY_PATH))
-    assert full_test_dataframe[0].equals(pd.read_csv(CHECK_CSV_FULL_PATH))
+    assert full_test_dataframe.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
diff --git a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only b/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only
deleted file mode 100644
index e5114aa..0000000
--- a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only
+++ /dev/null
@@ -1,5 +0,0 @@
-image_feat_0,image_feat_1,image_feat_2
-1.0,2.0,3.0
-4.0,5.0,6.0
-0.0,0.0,0.0
-7.0,8.0,9.0
diff --git a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full b/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full
index a05c411..e54a054 100644
--- a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full
+++ b/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full
@@ -1,5 +1,5 @@
-image,image_missing,image_feat_0,image_feat_1,image_feat_2
-borges.jpg,False,1.0,2.0,3.0
-arendt.bmp,False,4.0,5.0,6.0
-heidegger.jpg,True,0.0,0.0,0.0
-sappho.png,False,7.0,8.0,9.0
+image_missing,image_feat_0,image_feat_1,image_feat_2
+False,1.0,2.0,3.0
+False,4.0,5.0,6.0
+True,0.0,0.0,0.0
+False,7.0,8.0,9.0
diff --git a/tests/feature_preprocessing_test.py b/tests/feature_preprocessing_test.py
index dfedf1c..5d3499f 100644
--- a/tests/feature_preprocessing_test.py
+++ b/tests/feature_preprocessing_test.py
@@ -169,17 +169,8 @@ def test_image_paths_finder(image_path, csv_path, image_column_header, new_csv,
     csv only, and combined csv + directory
     """
     # check the new csv doesn't already exist
-    if os.path.isfile(new_csv) and new_csv != '':
-        os.remove(new_csv)
-
     # generated image lists
-    case, df = _image_paths_finder(image_path, csv_path, image_column_header, new_csv,
-                                   save_csv=True)
-
-    if new_csv != '':
-        assert os.path.isfile(new_csv)
-        # remove the generated csv
-        os.remove(new_csv)
+    case, df = _image_paths_finder(image_path, csv_path, image_column_header, new_csv)
 
     # Check the image lists match
     assert case == check_images
diff --git a/tests/image_featurizer_test.py b/tests/image_featurizer_test.py
index 1ed185a..49ad00c 100644
--- a/tests/image_featurizer_test.py
+++ b/tests/image_featurizer_test.py
@@ -124,15 +124,9 @@ def compare_featurizer_class(featurizer,
     assert featurizer.image_dict == image_dict
     assert featurizer.depth == depth
     if featurized:
-
         assert np.array_equal(pd.read_csv(check_csv).columns, featurizer.features.columns)
-
-        print(featurizer.features.astype(float))
-        print(pd.read_csv(check_csv))
-        pd.testing.assert_frame_equal(featurizer.features.astype(float),
-                                      pd.read_csv(check_csv).astype(float), check_less_precise=True)
         assert np.allclose(featurizer.features.astype(float), pd.read_csv(check_csv).astype(float),
-                           rtol=3e-04)
+                           atol=ATOL)
 
 
 def compare_empty_input(featurizer):