Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Raise Error on Missing Files during Load #596

Merged
merged 24 commits into from
Mar 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions eva/executor/load_multimedia_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from eva.plan_nodes.load_data_plan import LoadDataPlan
from eva.storage.abstract_storage_engine import AbstractStorageEngine
from eva.storage.storage_engine import StorageEngine
from eva.utils.errors import DatasetFileNotFoundError
from eva.utils.logging_manager import logger
from eva.utils.s3_utils import download_from_s3

Expand Down Expand Up @@ -66,6 +67,11 @@ def exec(self):
logger.error(err_msg)
raise ValueError(file_path)

if not valid_files:
raise DatasetFileNotFoundError(
f"Load {self.media_type.name} failed due to no valid files found on path {str(self.node.file_path)}"
)

# Create catalog entry
table_info = self.node.table_info
database_name = table_info.database_name
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/test_drop_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ class DropExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
self.video_file_path = create_sample_video()

def tearDown(self):
file_remove("dummy.avi")

# integration test
def test_should_drop_table(self):
catalog_manager = CatalogManager()
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

# catalog should contain vidoe table and the metedata table
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/test_insert_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class InsertExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
self.video_file_path = create_sample_video()

query = """CREATE TABLE IF NOT EXISTS CSVTable
(
Expand All @@ -44,7 +44,7 @@ def tearDown(self):
# integration test
@unittest.skip("Not supported in current version")
def test_should_load_video_in_table(self):
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

insert_query = """ INSERT INTO MyVideo (id, data) VALUES (
Expand Down
20 changes: 20 additions & 0 deletions test/integration_tests/test_load_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ def test_should_fail_to_load_videos_with_same_path(self):

self.assertEqual(expected_output, after_load_fail)

def test_should_fail_to_load_missing_video(self):
path = f"{EVA_ROOT_DIR}/data/sample_videos/missing.mp4"
query = f"""LOAD VIDEO "{path}" INTO MyVideos;"""
with self.assertRaises(ExecutorError) as exc_info:
execute_query_fetch_all(query)
self.assertIn(
"Load VIDEO failed due to no valid files found on path",
str(exc_info.exception),
)

def test_should_fail_to_load_corrupt_video(self):
# should fail on an empty file
tempfile_name = os.urandom(24).hex()
Expand Down Expand Up @@ -258,6 +268,16 @@ def test_should_load_images_in_table(self):
)
self.assertEqual(result, expected)

def test_should_fail_to_load_missing_image(self):
path = f"{EVA_ROOT_DIR}/data/sample_images/missing.jpg"
query = f"""LOAD IMAGE "{path}" INTO MyImages;"""
with self.assertRaises(ExecutorError) as exc_info:
execute_query_fetch_all(query)
self.assertIn(
"Load IMAGE failed due to no valid files found on path",
str(exc_info.exception),
)

def test_should_fail_to_load_images_with_same_path(self):
image_files = glob.glob(
os.path.expanduser(self.image_files_path), recursive=True
Expand Down
8 changes: 4 additions & 4 deletions test/integration_tests/test_rename_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class RenameExecutorTest(unittest.TestCase):
def setUp(self):
# reset the catalog manager before running each test
CatalogManager().reset()
create_sample_video()
create_sample_csv()
self.video_file_path = create_sample_video()
self.csv_file_path = create_sample_csv()

def tearDown(self):
file_remove("dummy.avi")
Expand All @@ -33,7 +33,7 @@ def tearDown(self):
# integration test
def test_should_rename_table(self):
catalog_manager = CatalogManager()
query = """LOAD VIDEO 'dummy.avi' INTO MyVideo;"""
query = f"""LOAD VIDEO '{self.video_file_path}' INTO MyVideo;"""
execute_query_fetch_all(query)

self.assertTrue(catalog_manager.get_table_catalog_entry("MyVideo") is not None)
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_should_fail_on_rename_structured_table(self):
execute_query_fetch_all(create_table_query)

# load the CSV
load_query = """LOAD CSV 'dummy.csv' INTO MyVideoCSV (id, frame_id, video_id, dataset_name);"""
load_query = f"""LOAD CSV '{self.csv_file_path}' INTO MyVideoCSV (id, frame_id, video_id, dataset_name);"""
execute_query_fetch_all(load_query)

with self.assertRaises(Exception) as cm:
Expand Down
2 changes: 2 additions & 0 deletions test/integration_tests/test_upload_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def tearDown(self):
file_remove("dummy.avi")
file_remove("dummy.csv")

@unittest.skip("System doesn't use UPLOAD anymore")
# integration test
def test_should_upload_file_to_location(self):
query = (
Expand All @@ -54,6 +55,7 @@ def test_should_upload_file_to_location(self):
actual_blob = str(base64.b64encode(bytes_read))
self.assertEqual(actual_blob, expected_blob)

@unittest.skip("System doesn't use UPLOAD anymore")
# integration test for csv
def test_should_upload_csv_to_table(self):
# loading a csv requires a table to be created first
Expand Down
1 change: 1 addition & 0 deletions test/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def create_sample_csv(num_frames=NUM_FRAMES):
df_sample_meta.to_csv(
os.path.join(upload_dir_from_config, "dummy.csv"), index=False
)
return os.path.join(upload_dir_from_config, "dummy.csv")


def create_sample_csv_as_blob(num_frames=NUM_FRAMES):
Expand Down