Skip to content

Commit

Permalink
fix: Raise Error on Missing Files during Load (#596)
Browse files Browse the repository at this point in the history
* change to symlink

* fixes failing test case

* adds exception handling and messaging

* bug fix in test case

* fixes linting

* mid commit

* complete commit

* fix linting

* fixes test cases

* fix

* lint changes

* change error to load file

* fix test

* fixes tests

* skips upload tests

* adds path

* add test cases

* lint

* makes error more specific

* lint
  • Loading branch information
suryatejreddy authored Mar 12, 2023
1 parent 2a852a8 commit 1c208c1
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 8 deletions.
6 changes: 6 additions & 0 deletions eva/executor/load_multimedia_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from eva.plan_nodes.load_data_plan import LoadDataPlan
from eva.storage.abstract_storage_engine import AbstractStorageEngine
from eva.storage.storage_engine import StorageEngine
from eva.utils.errors import DatasetFileNotFoundError
from eva.utils.logging_manager import logger
from eva.utils.s3_utils import download_from_s3

Expand Down Expand Up @@ -66,6 +67,11 @@ def exec(self):
logger.error(err_msg)
raise ValueError(file_path)

if not valid_files:
raise DatasetFileNotFoundError(
f"Load {self.media_type.name} failed due to no valid files found on path {str(self.node.file_path)}"
)

# Create catalog entry
table_info = self.node.table_info
database_name = table_info.database_name
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/test_drop_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ class DropExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
self.video_file_path = create_sample_video()

def tearDown(self):
file_remove("dummy.avi")

# integration test
def test_should_drop_table(self):
catalog_manager = CatalogManager()
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

# catalog should contain vidoe table and the metedata table
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/test_insert_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class InsertExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
self.video_file_path = create_sample_video()

query = """CREATE TABLE IF NOT EXISTS CSVTable
(
Expand All @@ -44,7 +44,7 @@ def tearDown(self):
# integration test
@unittest.skip("Not supported in current version")
def test_should_load_video_in_table(self):
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

insert_query = """ INSERT INTO MyVideo (id, data) VALUES (
Expand Down
20 changes: 20 additions & 0 deletions test/integration_tests/test_load_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ def test_should_fail_to_load_videos_with_same_path(self):

self.assertEqual(expected_output, after_load_fail)

def test_should_fail_to_load_missing_video(self):
path = f"{EVA_ROOT_DIR}/data/sample_videos/missing.mp4"
query = f"""LOAD VIDEO "{path}" INTO MyVideos;"""
with self.assertRaises(ExecutorError) as exc_info:
execute_query_fetch_all(query)
self.assertIn(
"Load VIDEO failed due to no valid files found on path",
str(exc_info.exception),
)

def test_should_fail_to_load_corrupt_video(self):
# should fail on an empty file
tempfile_name = os.urandom(24).hex()
Expand Down Expand Up @@ -258,6 +268,16 @@ def test_should_load_images_in_table(self):
)
self.assertEqual(result, expected)

def test_should_fail_to_load_missing_image(self):
path = f"{EVA_ROOT_DIR}/data/sample_images/missing.jpg"
query = f"""LOAD IMAGE "{path}" INTO MyImages;"""
with self.assertRaises(ExecutorError) as exc_info:
execute_query_fetch_all(query)
self.assertIn(
"Load IMAGE failed due to no valid files found on path",
str(exc_info.exception),
)

def test_should_fail_to_load_images_with_same_path(self):
image_files = glob.glob(
os.path.expanduser(self.image_files_path), recursive=True
Expand Down
8 changes: 4 additions & 4 deletions test/integration_tests/test_rename_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class RenameExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
create_sample_csv()
self.video_file_path = create_sample_video()
self.csv_file_path = create_sample_csv()

def tearDown(self):
file_remove("dummy.avi")
Expand All @@ -33,7 +33,7 @@ def tearDown(self):
# integration test
def test_should_rename_table(self):
catalog_manager = CatalogManager()
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

self.assertTrue(catalog_manager.get_table_catalog_entry("MyVideo") is not None)
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_should_fail_on_rename_structured_table(self):
execute_query_fetch_all(create_table_query)

# load the CSV
load_query = """LOAD CSV 'dummy.csv' INTO MyVideoCSV (id, frame_id, video_id, dataset_name);"""
load_query = f"""LOAD CSV '{self.csv_file_path}' INTO MyVideoCSV (id, frame_id, video_id, dataset_name);"""
execute_query_fetch_all(load_query)

with self.assertRaises(Exception) as cm:
Expand Down
2 changes: 2 additions & 0 deletions test/integration_tests/test_upload_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def tearDown(self):
file_remove("dummy.avi")
file_remove("dummy.csv")

@unittest.skip("System doesn't use UPLOAD anymore")
# integration test
def test_should_upload_file_to_location(self):
query = (
Expand All @@ -54,6 +55,7 @@ def test_should_upload_file_to_location(self):
actual_blob = str(base64.b64encode(bytes_read))
self.assertEqual(actual_blob, expected_blob)

@unittest.skip("System doesn't use UPLOAD anymore")
# integration test for csv
def test_should_upload_csv_to_table(self):
# loading a csv requires a table to be created first
Expand Down
1 change: 1 addition & 0 deletions test/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def create_sample_csv(num_frames=NUM_FRAMES):
df_sample_meta.to_csv(
os.path.join(upload_dir_from_config, "dummy.csv"), index=False
)
return os.path.join(upload_dir_from_config, "dummy.csv")


def create_sample_csv_as_blob(num_frames=NUM_FRAMES):
Expand Down

0 comments on commit 1c208c1

Please sign in to comment.