diff --git a/src/jp2_remediator/box_reader_factory.py b/src/jp2_remediator/box_reader_factory.py new file mode 100644 index 0000000..0415d6c --- /dev/null +++ b/src/jp2_remediator/box_reader_factory.py @@ -0,0 +1,13 @@ +from jp2_remediator.box_reader import BoxReader + + +class BoxReaderFactory: + + def get_reader(self, file_path): + """ + Create a BoxReader instance for a given file path. + :param file_path: The path to the file to be read. + :return: A BoxReader instance. + """ + return BoxReader(file_path) + diff --git a/src/jp2_remediator/main.py b/src/jp2_remediator/main.py index 11a4e71..2ccf693 100644 --- a/src/jp2_remediator/main.py +++ b/src/jp2_remediator/main.py @@ -1,10 +1,11 @@ import argparse +from jp2_remediator.box_reader_factory import BoxReaderFactory from jp2_remediator.processor import Processor def main(): """Main entry point for the JP2 file processor.""" - processor = Processor() + processor = Processor(BoxReaderFactory()) parser = argparse.ArgumentParser(description="JP2 file processor") diff --git a/src/jp2_remediator/processor.py b/src/jp2_remediator/processor.py index d5b97c2..7727ad8 100644 --- a/src/jp2_remediator/processor.py +++ b/src/jp2_remediator/processor.py @@ -2,16 +2,18 @@ import os import boto3 -from jp2_remediator.box_reader import BoxReader - class Processor: """Class to process JP2 files.""" + def __init__(self, factory): + """Initialize the Processor with a BoxReader factory.""" + self.box_reader_factory = factory + def process_file(self, file_path): """Process a single JP2 file.""" print(f"Processing file: {file_path}") - reader = BoxReader(file_path) + reader = self.box_reader_factory.get_reader(file_path) reader.read_jp2_file() def process_directory(self, directory_path): @@ -21,7 +23,7 @@ def process_directory(self, directory_path): if file.lower().endswith(".jp2"): file_path = os.path.join(root, file) print(f"Processing file: {file_path}") - reader = BoxReader(file_path) + reader = self.box_reader_factory.get_reader(file_path) reader.read_jp2_file() def process_s3_bucket(self, bucket_name, prefix=""): @@ -38,7 +40,7 @@ def process_s3_bucket(self, bucket_name, prefix=""): }""") download_path = f"/tmp/{os.path.basename(file_path)}" s3.download_file(bucket_name, file_path, download_path) - reader = BoxReader(download_path) + reader = self.box_reader_factory.get_reader(download_path) reader.read_jp2_file() # Optionally, upload modified file back to S3 timestamp = datetime.datetime.now().strftime( diff --git a/src/jp2_remediator/tests/unit/test_box_reader.py b/src/jp2_remediator/tests/unit/test_box_reader.py index ba1bf86..e100e21 100644 --- a/src/jp2_remediator/tests/unit/test_box_reader.py +++ b/src/jp2_remediator/tests/unit/test_box_reader.py @@ -1,7 +1,7 @@ import unittest import os from unittest.mock import patch, mock_open, MagicMock -from jp2_remediator.box_reader import BoxReader, process_directory, process_s3_bucket +from jp2_remediator.box_reader import BoxReader from jpylyzer import boxvalidator from project_paths import paths import datetime @@ -137,29 +137,6 @@ def test_process_all_trc_tags(self): ) self.assertEqual(modified_contents, self.reader.file_contents) - # Test for process_directory function - @patch("jp2_remediator.box_reader.BoxReader") - @patch("os.walk", return_value=[("root", [], ["file1.jp2", "file2.jp2"])]) - @patch("builtins.print") - def test_process_directory_with_multiple_files( - self, mock_print, mock_os_walk, mock_box_reader - ): - # Process a dir with multiple jp2 files - # Mock the logger for each BoxReader instance created - mock_box_reader.return_value.logger = MagicMock() - - # Call process_directory with a dummy path - process_directory("dummy_path") - - # Check that each JP2 file in the directory was processed - mock_print.assert_any_call("Processing file: root/file1.jp2") - mock_print.assert_any_call("Processing file: root/file2.jp2") - - # Ensure each BoxReader instance had its read_jp2_file method called - self.assertEqual( - mock_box_reader.return_value.read_jp2_file.call_count, 2 - ) - # Test for check_boxes method logging when 'jp2h' not found def test_jp2h_not_found_logging(self): # Set up file_contents to simulate a missing 'jp2h' box @@ -186,9 +163,10 @@ def test_write_modified_file_no_changes(self, mock_file): mock_file.assert_not_called() # Check that the specific debug message was logged - self.reader.logger.debug.assert_called_once_with( - "No modifications needed. No new file created." - ) + self.reader.logger.info.assert_called_once() + pattern = r"No modifications needed\. No new file created: .*sample\.jp2" + call_args = self.reader.logger.info.call_args + self.assertRegex(call_args[0][0], pattern) # Test for process_colr_box method when meth_value == 1 def test_process_colr_box_meth_value_1(self): @@ -316,7 +294,7 @@ def test_read_jp2_file(self): mock_validator._isValid.assert_called_once() # Assert that logger.info was called with correct parameters - self.reader.logger.info.assert_called_with("Is file valid?", True) + self.reader.logger.info.assert_called_with("Is file valid? True") # Assert that check_boxes was called once mock_check_boxes.assert_called_once() @@ -347,82 +325,6 @@ def test_read_jp2_file_no_file_contents(self): mock_process_all_trc_tags.assert_not_called() mock_write_modified_file.assert_not_called() - # Test for process_s3_bucket function - @patch("jp2_remediator.box_reader.boto3.client") - @patch("jp2_remediator.box_reader.BoxReader") - @patch("builtins.print") - def test_process_s3_bucket(self, mock_print, mock_box_reader, mock_boto3_client): - # Set up the mock S3 client - mock_s3_client = MagicMock() - mock_boto3_client.return_value = mock_s3_client - - # Define the bucket name and prefix - bucket_name = "test-bucket" - prefix = "test-prefix" - - # Prepare a fake response for list_objects_v2 - mock_s3_client.list_objects_v2.return_value = { - "Contents": [ - {"Key": "file1.jp2"}, - {"Key": "file2.jp2"}, - {"Key": "file3.txt"}, # Non-JP2 file to test filtering - ] - } - - # Mock download_file and upload_file methods - mock_s3_client.download_file.return_value = None - mock_s3_client.upload_file.return_value = None - - # Mock BoxReader instance and its read_jp2_file method - mock_reader_instance = MagicMock() - mock_box_reader.return_value = mock_reader_instance - - # Call the method under test - process_s3_bucket(bucket_name, prefix) - - # Verify that list_objects_v2 was called with the correct parameters - mock_s3_client.list_objects_v2.assert_called_once_with(Bucket=bucket_name, Prefix=prefix) - - # Verify that download_file was called for each .jp2 file - expected_download_calls = [ - unittest.mock.call(bucket_name, "file1.jp2", "/tmp/file1.jp2"), - unittest.mock.call(bucket_name, "file2.jp2", "/tmp/file2.jp2"), - ] - self.assertEqual(mock_s3_client.download_file.call_args_list, expected_download_calls) - - # Verify that BoxReader was instantiated with the correct download paths - expected_boxreader_calls = [ - unittest.mock.call("/tmp/file1.jp2"), - unittest.mock.call("/tmp/file2.jp2"), - ] - self.assertEqual(mock_box_reader.call_args_list, expected_boxreader_calls) - - # Verify that read_jp2_file was called for each .jp2 file - self.assertEqual(mock_reader_instance.read_jp2_file.call_count, 2) - - # Verify that upload_file was called for each .jp2 file - upload_calls = mock_s3_client.upload_file.call_args_list - self.assertEqual(len(upload_calls), 2) - for call in upload_calls: - args, _ = call - local_file_path = args[0] - upload_bucket = args[1] - upload_key = args[2] - # Check that the local file path includes '_modified_' and ends with '.jp2' - self.assertIn("_modified_", local_file_path) - self.assertTrue(local_file_path.endswith(".jp2")) - # Check that the upload is to the correct bucket and key - self.assertEqual(upload_bucket, bucket_name) - self.assertIn("_modified_", upload_key) - self.assertTrue(upload_key.endswith(".jp2")) - - # Verify that print was called correctly - expected_print_calls = [ - unittest.mock.call(f"Processing file: file1.jp2 from bucket {bucket_name}"), - unittest.mock.call(f"Processing file: file2.jp2 from bucket {bucket_name}"), - ] - mock_print.assert_has_calls(expected_print_calls, any_order=True) - # Test for process_trc_tag: when trc_tag_size != curv_trc_field_length def test_process_trc_tag_size_mismatch(self): # Prepare test data where trc_tag_size does not match curv_trc_field_length diff --git a/src/jp2_remediator/tests/unit/test_processor.py b/src/jp2_remediator/tests/unit/test_processor.py new file mode 100644 index 0000000..eccdaef --- /dev/null +++ b/src/jp2_remediator/tests/unit/test_processor.py @@ -0,0 +1,126 @@ +import unittest +import pytest +from unittest.mock import call, patch, MagicMock +from jp2_remediator.processor import Processor + +class TestProcessor: + + @pytest.fixture + def mock_box_reader_factory(self): + return MagicMock() + + @pytest.fixture + def processor(self, mock_box_reader_factory): + return Processor(mock_box_reader_factory) + + # Test for process_file function + @patch("builtins.print") + def test_process_file(self, mock_print, processor, mock_box_reader_factory): + # Define the file path + file_path = "test_file.jp2" + + # Call process_file with the test file path + processor.process_file(file_path) + + # Check that the file was processed + mock_print.assert_called_once_with(f"Processing file: {file_path}") + + # Ensure the BoxReader instance had its read_jp2_file method called + mock_box_reader_factory.get_reader.assert_called_once_with(file_path) + mock_box_reader_factory.get_reader.return_value.read_jp2_file.assert_called_once() + + + # Test for process_directory function + @patch("os.walk", return_value=[("root", [], ["file1.jp2", "file2.jp2"])]) + @patch("builtins.print") + def test_process_directory_with_multiple_files( + self, mock_print, mock_os_walk, processor, mock_box_reader_factory + ): + # Call process_directory with a dummy path + processor.process_directory("dummy_path") + + # Check that each JP2 file in the directory was processed + mock_print.assert_any_call("Processing file: root/file1.jp2") + mock_print.assert_any_call("Processing file: root/file2.jp2") + + # Ensure each BoxReader instance had its read_jp2_file method called + assert mock_box_reader_factory.get_reader.call_count == 2 + + # Ensure each BoxReader instance was created with the correct file path + assert mock_box_reader_factory.get_reader.call_args_list == [ + call("root/file1.jp2"), + call("root/file2.jp2"), + ] + assert mock_box_reader_factory.get_reader.return_value.read_jp2_file.call_count == 2 + + + # Test for process_s3_bucket function + @patch("jp2_remediator.processor.boto3.client") + @patch("builtins.print") + def test_process_s3_bucket(self, mock_print, mock_boto3_client, processor, mock_box_reader_factory): + # Set up the mock S3 client + mock_s3_client = MagicMock() + mock_boto3_client.return_value = mock_s3_client + + # Define the bucket name and prefix + bucket_name = "test-bucket" + prefix = "test-prefix" + + # Prepare a fake response for list_objects_v2 + mock_s3_client.list_objects_v2.return_value = { + "Contents": [ + {"Key": "file1.jp2"}, + {"Key": "file2.jp2"}, + {"Key": "file3.txt"}, # Non-JP2 file to test filtering + ] + } + + # Mock download_file and upload_file methods + mock_s3_client.download_file.return_value = None + mock_s3_client.upload_file.return_value = None + + # Call the method under test + processor.process_s3_bucket(bucket_name, prefix) + + # Verify that list_objects_v2 was called with the correct parameters + mock_s3_client.list_objects_v2.assert_called_once_with(Bucket=bucket_name, Prefix=prefix) + + # Verify that download_file was called for each .jp2 file + expected_download_calls = [ + unittest.mock.call(bucket_name, "file1.jp2", "/tmp/file1.jp2"), + unittest.mock.call(bucket_name, "file2.jp2", "/tmp/file2.jp2"), + ] + assert mock_s3_client.download_file.call_args_list == expected_download_calls + + # Verify that BoxReader was instantiated with the correct download paths + expected_boxreader_calls = [ + unittest.mock.call("/tmp/file1.jp2"), + unittest.mock.call("/tmp/file2.jp2"), + ] + assert mock_box_reader_factory.get_reader.call_args_list == expected_boxreader_calls + + # Verify that read_jp2_file was called for each .jp2 file + assert mock_box_reader_factory.get_reader.return_value.read_jp2_file.call_count == 2 + + # Verify that upload_file was called for each .jp2 file + upload_calls = mock_s3_client.upload_file.call_args_list + assert len(upload_calls) == 2 + for call in upload_calls: + args, _ = call + local_file_path = args[0] + upload_bucket = args[1] + upload_key = args[2] + # Check that the local file path includes '_modified_' and ends with '.jp2' + assert "_modified_" in local_file_path, "'_modified_' should be in local_file_path" + assert local_file_path.endswith(".jp2") + # Check that the upload is to the correct bucket and key + assert upload_bucket == bucket_name + assert "_modified_" in upload_key + assert upload_key.endswith(".jp2") + + # Verify that print was called correctly + expected_print_calls = [ + unittest.mock.call(f"Processing file: file1.jp2 from bucket {bucket_name}"), + unittest.mock.call(f"Processing file: file2.jp2 from bucket {bucket_name}"), + ] + mock_print.assert_has_calls(expected_print_calls, any_order=True) \ No newline at end of file