Refactor 'main.py' (#5)

* Refactor 'main.py' * Update tests due to main.py and processor.py refactoring
harvard-lts · Nov 15, 2024 · fef6903 · fef6903
1 parent 47f5496
commit fef6903
Show file tree

Hide file tree

Showing 8 changed files with 300 additions and 174 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ output/*
 .coverage
 coverage.*
 
+myenv/
 
 dist/*
 */*.egg-info/*

diff --git a/README.md b/README.md
@@ -15,30 +15,56 @@ pip install jp2_remediator==0.0.2
 
 ## Usage
 
-## Process one file
-`python3 main.py --file tests/test-images/7514499.jp2`
+```bash
+python3 src/jp2_remediator/main.py  -h
+
+usage: main.py [-h] {file,directory,bucket} ...
+
+JP2 file processor
 
-`python3 main.py --file tests/test-images/481014278.jp2`
+options:
+  -h, --help            show this help message and exit
 
-## Process directory
-`python3 main.py --directory tests/test-images/`
+Input source:
+  {file,directory,bucket}
+    file                Process a single JP2 file
+    directory           Process all JP2 files in a directory
+    bucket              Process all JP2 files in an S3 bucket
+```
 
-## Process Amazon S3 bucket
-`python3 main.py --bucket your-bucket-name --prefix optional-prefix`
+### Process one file
+```bash
+python3 src/jp2_remediator/main.py file tests/test-images/7514499.jp2
 
-## Process all .jp2 files in the bucket:
-`python3 main.py --bucket remediation-folder`
+python3 src/jp2_remediator/main.py file tests/test-images/481014278.jp2
+```
+
+### Process directory
+```bash
+python3 src/jp2_remediator/main.py directory tests/test-images/
+```
+
+### Process all .jp2 files in an S3 bucket:
+```bash
+python3 src/jp2_remediator/main.py bucket remediation-folder
+```
 
-## Process only files with a specific prefix (folder):
-`python3 main.py --bucket remediation-folder --prefix testbatch_20240923`
+### Process only files with a specific prefix (folder):
+```bash
+python3 src/jp2_remediator/main.py bucket remediation-folder --prefix testbatch_20240923`
+```
 
-`python3 main.py --help`
+## Run tests
 
-## Run Tests
-`python3 test_aws_connection.py`
+### Run integration tests
+```bash
+pytest src/jp2_remediator/tests/integration/
+```
 
-### Run from src folder
-`python3 -m unittest jp2_remediator.tests.unit.test_box_reader`
+### Run unit tests
+```bash
+pytest src/jp2_remediator/tests/unit/
+```
 
 ## Docker environment
 
@@ -51,3 +77,13 @@ Start Docker container
 ```bash
 ./bin/docker-run.sh
 ```
+
+## Development environment
+```bash
+python3 -m venv myenv
+source myenv/bin/activate
+export PYTHONPATH="${PYTHONPATH}:src"
+pip install -r requirements.txt
+
+python src/jp2_remediator/main.py -h
+```
diff --git a/src/jp2_remediator/box_reader.py b/src/jp2_remediator/box_reader.py
@@ -1,5 +1,3 @@
-import os
-import boto3
 import datetime
 from jp2_remediator import configure_logger
 from jpylyzer import boxvalidator
@@ -169,7 +167,7 @@ def write_modified_file(self, new_file_contents):
                 new_file.write(new_file_contents)
             self.logger.info(f"New JP2 file created with modifications: {new_file_path}")
         else:
-            self.logger.debug("No modifications needed. No new file created.")
+            self.logger.info(f"No modifications needed. No new file created: {self.file_path}")
 
     def read_jp2_file(self):
         # Main function to read, validate, and modify JP2 files.
@@ -178,43 +176,9 @@ def read_jp2_file(self):
 
         self.initialize_validator()
         is_valid = self.validator._isValid()
-        self.logger.info("Is file valid?", is_valid)
+        self.logger.info(f"Is file valid? {is_valid}")
 
         header_offset_position = self.check_boxes()
         new_file_contents = self.process_all_trc_tags(header_offset_position)
 
         self.write_modified_file(new_file_contents)
-
-
-def process_directory(directory_path):
-    # Process all JP2 files in a given directory.
-    for root, _, files in os.walk(directory_path):
-        for file in files:
-            if file.lower().endswith(".jp2"):
-                file_path = os.path.join(root, file)
-                print(f"Processing file: {file_path}")
-                reader = BoxReader(file_path)
-                reader.read_jp2_file()
-
-
-def process_s3_bucket(bucket_name, prefix=""):
-    # Process all JP2 files in a given S3 bucket.
-    s3 = boto3.client("s3")
-    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
-
-    if "Contents" in response:
-        for obj in response["Contents"]:
-            if obj["Key"].lower().endswith(".jp2"):
-                file_path = obj["Key"]
-                print(f"Processing file: {file_path} from bucket {bucket_name}")
-                download_path = f"/tmp/{os.path.basename(file_path)}"
-                s3.download_file(bucket_name, file_path, download_path)
-                reader = BoxReader(download_path)
-                reader.read_jp2_file()
-                # Optionally, upload modified file back to S3
-                timestamp = datetime.datetime.now().strftime("%Y%m%d")  # use "%Y%m%d_%H%M%S" for more precision
-                s3.upload_file(
-                    download_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
-                    bucket_name,
-                    file_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
-                )
diff --git a/src/jp2_remediator/box_reader_factory.py b/src/jp2_remediator/box_reader_factory.py
@@ -0,0 +1,12 @@
+from jp2_remediator.box_reader import BoxReader
+
+
+class BoxReaderFactory:
+
+    def get_reader(self, file_path):
+        """
+        Create a BoxReader instance for a given file path.
+        :param file_path: The path to the file to be read.
+        :return: A BoxReader instance.
+        """
+        return BoxReader(file_path)
diff --git a/src/jp2_remediator/main.py b/src/jp2_remediator/main.py
@@ -1,31 +1,62 @@
 import argparse
-from jp2_remediator.box_reader import BoxReader, process_directory, process_s3_bucket
+from jp2_remediator.box_reader_factory import BoxReaderFactory
+from jp2_remediator.processor import Processor
 
 
 def main():
+    """Main entry point for the JP2 file processor."""
+    processor = Processor(BoxReaderFactory())
+
     parser = argparse.ArgumentParser(description="JP2 file processor")
-    parser.add_argument("--file", help="Path to a single JP2 file to process.")
-    parser.add_argument(
-        "--directory", help="Path to a directory of JP2 files to process."
+
+    # Create mutually exclusive subparsers for specifying input source
+    subparsers = parser.add_subparsers(
+        title="Input source", dest="input_source"
+    )
+
+    # Subparser for processing a single JP2 file
+    file_parser = subparsers.add_parser(
+        "file", help="Process a single JP2 file"
+    )
+    file_parser.add_argument(
+        "file", help="Path to a single JP2 file to process"
+    )
+    file_parser.set_defaults(
+        func=lambda args: processor.process_file(args.file)
+    )
+
+    # Subparser for processing all JP2 files in a directory
+    directory_parser = subparsers.add_parser(
+        "directory", help="Process all JP2 files in a directory"
+    )
+    directory_parser.add_argument(
+        "directory", help="Path to a directory of JP2 files to process"
+    )
+    directory_parser.set_defaults(
+        func=lambda args: processor.process_directory(args.directory)
+    )
+
+    # Subparser for processing all JP2 files in an S3 bucket
+    bucket_parser = subparsers.add_parser(
+        "bucket", help="Process all JP2 files in an S3 bucket"
+    )
+    bucket_parser.add_argument(
+        "bucket", help="Name of the AWS S3 bucket to process JP2 files from"
     )
-    parser.add_argument(
-        "--bucket", help="Name of the AWS S3 bucket to process JP2 files from."
+    bucket_parser.add_argument(
+        "--prefix", help="Prefix of files in the AWS S3 bucket (optional)",
+        default=""
     )
-    parser.add_argument(
-        "--prefix", help="Prefix of files in the AWS S3 bucket (optional)."
+    bucket_parser.set_defaults(
+        func=lambda args: processor.process_s3_bucket(args.bucket, args.prefix)
     )
 
     args = parser.parse_args()
 
-    if args.file:
-        reader = BoxReader(args.file)
-        reader.read_jp2_file()
-    elif args.directory:
-        process_directory(args.directory)
-    elif args.bucket:
-        process_s3_bucket(args.bucket, args.prefix)
+    if hasattr(args, "func"):
+        args.func(args)
     else:
-        print("Please specify either --file, --directory, or --bucket.")
+        parser.print_help()
 
 
 if __name__ == "__main__":

diff --git a/src/jp2_remediator/processor.py b/src/jp2_remediator/processor.py
@@ -0,0 +1,55 @@
+import datetime
+import os
+import boto3
+
+
+class Processor:
+    """Class to process JP2 files."""
+
+    def __init__(self, factory):
+        """Initialize the Processor with a BoxReader factory."""
+        self.box_reader_factory = factory
+
+    def process_file(self, file_path):
+        """Process a single JP2 file."""
+        print(f"Processing file: {file_path}")
+        reader = self.box_reader_factory.get_reader(file_path)
+        reader.read_jp2_file()
+
+    def process_directory(self, directory_path):
+        """Process all JP2 files in a given directory."""
+        for root, _, files in os.walk(directory_path):
+            for file in files:
+                if file.lower().endswith(".jp2"):
+                    file_path = os.path.join(root, file)
+                    print(f"Processing file: {file_path}")
+                    reader = self.box_reader_factory.get_reader(file_path)
+                    reader.read_jp2_file()
+
+    def process_s3_bucket(self, bucket_name, prefix=""):
+        """Process all JP2 files in a given S3 bucket."""
+        s3 = boto3.client("s3")
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
+
+        if "Contents" in response:
+            for obj in response["Contents"]:
+                if obj["Key"].lower().endswith(".jp2"):
+                    file_path = obj["Key"]
+                    print(f"""Processing file: {file_path} from bucket {
+                        bucket_name
+                        }""")
+                    download_path = f"/tmp/{os.path.basename(file_path)}"
+                    s3.download_file(bucket_name, file_path, download_path)
+                    reader = self.box_reader_factory.get_reader(download_path)
+                    reader.read_jp2_file()
+                    # Optionally, upload modified file back to S3
+                    timestamp = datetime.datetime.now().strftime(
+                        "%Y%m%d"
+                    )  # use "%Y%m%d_%H%M%S" for more precision
+                    s3.upload_file(
+                        download_path.replace(
+                            ".jp2", f"_modified_{timestamp}.jp2"
+                            ),
+                        bucket_name,
+                        file_path.replace(".jp2", f"_modified_{timestamp}.jp2")
+                    )
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ output/* @@
     .coverage
     coverage.*
+    myenv/
     dist/*
     */*.egg-info/*
@@ Expand Down @@