Absolute Tolerance increased for testing– batch processing failures fix

Fixing testing failures, removing deprecated parameters.
datarobot · Mar 1, 2018 · 9206ef8 · 9206ef8
1 parent e553e75
commit 9206ef8
Show file tree

Hide file tree

Showing 8 changed files with 14 additions and 49 deletions.
diff --git a/pic2vec/data_featurizing.py b/pic2vec/data_featurizing.py
@@ -134,7 +134,6 @@ def _create_features_df_helper(data_array, full_feature_array, image_column_head
 
 
 def create_features(data_array, new_feature_array, image_column_header,
-                    image_list,
                     save_features=False):
     """
     Write the feature array to a new csv, and append the features to the appropriate

diff --git a/pic2vec/image_featurizer.py b/pic2vec/image_featurizer.py
@@ -535,7 +535,6 @@ def _featurize_helper(self, features, image_column_headers,
                 create_features(batch_data[column],
                                 partial_features,
                                 image_column_headers[column],
-                                self.image_dict[image_column_headers[column]],
                                 save_features=save_features)
 
             features_list.append(df_features)

diff --git a/tests/build_featurizer_test.py b/tests/build_featurizer_test.py
@@ -21,7 +21,7 @@
 random.seed(5102020)
 
 # Tolerance for prediction error
-ATOL = 0.0001
+ATOL = 0.00001
 
 # Building the checking model
 input_layer = Input(shape=(100, ))

diff --git a/tests/data_featurizing_test.py b/tests/data_featurizing_test.py
@@ -84,16 +84,8 @@ def test_create_features_bad_feature_array():
     # An error array with the wrong size
     error_feature_array = np.zeros((4, 3, 2))
     with pytest.raises(ValueError):
-        create_features(CHECK_DATA, error_feature_array, pd.read_csv(CHECK_CSV_IMAGES_PATH),
-                        'image', CHECK_IMAGE_LIST, continued_column=False,
-                        save_features=True)
-
-
-def test_features_to_csv_bad_column_header():
-    """Raise an error when the column header is not found in the csv"""
-    with pytest.raises(ValueError):
-        create_features(CHECK_DATA, CHECK_ARRAY, pd.read_csv(CHECK_CSV_IMAGES_PATH), 'derp',
-                        CHECK_IMAGE_LIST, continued_column=False,
+        create_features(CHECK_DATA, error_feature_array,
+                        'image',
                         save_features=True)
 
 
@@ -102,27 +94,22 @@ def test_features_to_csv_bad_data_array():
     # An error array with the wrong size
     error_array = np.zeros((4, 3, 2))
     with pytest.raises(ValueError):
-        create_features(error_array, CHECK_ARRAY, pd.read_csv(CHECK_CSV_IMAGES_PATH), 'image',
-                        CHECK_IMAGE_LIST, continued_column=False,
+        create_features(error_array, CHECK_ARRAY, 'image',
                         save_features=True)
 
 
 def test_create_features_df_helper():
     """Test that the correct full array is created to be passed to the create_features function"""
-    df = pd.read_csv(CHECK_CSV_IMAGES_PATH)
-    full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image', df)[0]
+    full_df_test = _create_features_df_helper(CHECK_DATA, CHECK_ARRAY, 'image')
     assert full_df_test.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
 
 
 def test_features_to_csv():
     """Test that the model creates the correct csvs from a toy array, csv, and image list"""
     # Create the test
     full_test_dataframe = create_features(CHECK_DATA, CHECK_ARRAY,
-                                          pd.read_csv(CHECK_CSV_IMAGES_PATH),
-                                          'image', CHECK_IMAGE_LIST, continued_column=False,
+                                          'image',
                                           save_features=True)
 
-    print(full_test_dataframe[1])
     # Assert that the dataframes returned are correct
-    assert full_test_dataframe[1].equals(pd.read_csv(CHECK_CSV_FEATURES_ONLY_PATH))
-    assert full_test_dataframe[0].equals(pd.read_csv(CHECK_CSV_FULL_PATH))
+    assert full_test_dataframe.equals(pd.read_csv(CHECK_CSV_FULL_PATH))
diff --git a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only b/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_features_only
diff --git a/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full b/tests/data_featurizing_testing/csv_testing/featurize_data_check_csv_full
@@ -1,5 +1,5 @@
-image,image_missing,image_feat_0,image_feat_1,image_feat_2
-borges.jpg,False,1.0,2.0,3.0
-arendt.bmp,False,4.0,5.0,6.0
-heidegger.jpg,True,0.0,0.0,0.0
-sappho.png,False,7.0,8.0,9.0
+image_missing,image_feat_0,image_feat_1,image_feat_2
+False,1.0,2.0,3.0
+False,4.0,5.0,6.0
+True,0.0,0.0,0.0
+False,7.0,8.0,9.0
diff --git a/tests/feature_preprocessing_test.py b/tests/feature_preprocessing_test.py
@@ -169,17 +169,8 @@ def test_image_paths_finder(image_path, csv_path, image_column_header, new_csv,
     csv only, and combined csv + directory
     """
     # check the new csv doesn't already exist
-    if os.path.isfile(new_csv) and new_csv != '':
-        os.remove(new_csv)
-
     # generated image lists
-    case, df = _image_paths_finder(image_path, csv_path, image_column_header, new_csv,
-                                   save_csv=True)
-
-    if new_csv != '':
-        assert os.path.isfile(new_csv)
-        # remove the generated csv
-        os.remove(new_csv)
+    case, df = _image_paths_finder(image_path, csv_path, image_column_header, new_csv)
 
     # Check the image lists match
     assert case == check_images

diff --git a/tests/image_featurizer_test.py b/tests/image_featurizer_test.py
@@ -124,15 +124,9 @@ def compare_featurizer_class(featurizer,
     assert featurizer.image_dict == image_dict
     assert featurizer.depth == depth
     if featurized:
-
         assert np.array_equal(pd.read_csv(check_csv).columns, featurizer.features.columns)
-
-        print(featurizer.features.astype(float))
-        print(pd.read_csv(check_csv))
-        pd.testing.assert_frame_equal(featurizer.features.astype(float),
-                                      pd.read_csv(check_csv).astype(float), check_less_precise=True)
         assert np.allclose(featurizer.features.astype(float), pd.read_csv(check_csv).astype(float),
-                           rtol=3e-04)
+                           atol=ATOL)
 
 
 def compare_empty_input(featurizer):