Skip to content

Commit

Permalink
Refactor 'main.py' (#5)
Browse files Browse the repository at this point in the history
* Refactor 'main.py'

* Update tests due to main.py and processor.py refactoring
  • Loading branch information
awoods authored and kimpham54 committed Nov 15, 2024
1 parent 47f5496 commit fef6903
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 174 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ output/*
.coverage
coverage.*

myenv/

dist/*
*/*.egg-info/*
Expand Down
68 changes: 52 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,56 @@ pip install jp2_remediator==0.0.2

## Usage

## Process one file
`python3 main.py --file tests/test-images/7514499.jp2`
```bash
python3 src/jp2_remediator/main.py -h

usage: main.py [-h] {file,directory,bucket} ...

JP2 file processor

`python3 main.py --file tests/test-images/481014278.jp2`
options:
-h, --help show this help message and exit

## Process directory
`python3 main.py --directory tests/test-images/`
Input source:
{file,directory,bucket}
file Process a single JP2 file
directory Process all JP2 files in a directory
bucket Process all JP2 files in an S3 bucket
```

## Process Amazon S3 bucket
`python3 main.py --bucket your-bucket-name --prefix optional-prefix`
### Process one file
```bash
python3 src/jp2_remediator/main.py file tests/test-images/7514499.jp2

## Process all .jp2 files in the bucket:
`python3 main.py --bucket remediation-folder`
python3 src/jp2_remediator/main.py file tests/test-images/481014278.jp2
```

### Process directory
```bash
python3 src/jp2_remediator/main.py directory tests/test-images/
```

### Process all .jp2 files in an S3 bucket:
```bash
python3 src/jp2_remediator/main.py bucket remediation-folder
```

## Process only files with a specific prefix (folder):
`python3 main.py --bucket remediation-folder --prefix testbatch_20240923`
### Process only files with a specific prefix (folder):
```bash
python3 src/jp2_remediator/main.py bucket remediation-folder --prefix testbatch_20240923`
```

`python3 main.py --help`
## Run tests

## Run Tests
`python3 test_aws_connection.py`
### Run integration tests
```bash
pytest src/jp2_remediator/tests/integration/
```

### Run from src folder
`python3 -m unittest jp2_remediator.tests.unit.test_box_reader`
### Run unit tests
```bash
pytest src/jp2_remediator/tests/unit/
```

## Docker environment

Expand All @@ -51,3 +77,13 @@ Start Docker container
```bash
./bin/docker-run.sh
```

## Development environment
```bash
python3 -m venv myenv
source myenv/bin/activate
export PYTHONPATH="${PYTHONPATH}:src"
pip install -r requirements.txt
python src/jp2_remediator/main.py -h
```
40 changes: 2 additions & 38 deletions src/jp2_remediator/box_reader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os
import boto3
import datetime
from jp2_remediator import configure_logger
from jpylyzer import boxvalidator
Expand Down Expand Up @@ -169,7 +167,7 @@ def write_modified_file(self, new_file_contents):
new_file.write(new_file_contents)
self.logger.info(f"New JP2 file created with modifications: {new_file_path}")
else:
self.logger.debug("No modifications needed. No new file created.")
self.logger.info(f"No modifications needed. No new file created: {self.file_path}")

def read_jp2_file(self):
# Main function to read, validate, and modify JP2 files.
Expand All @@ -178,43 +176,9 @@ def read_jp2_file(self):

self.initialize_validator()
is_valid = self.validator._isValid()
self.logger.info("Is file valid?", is_valid)
self.logger.info(f"Is file valid? {is_valid}")

header_offset_position = self.check_boxes()
new_file_contents = self.process_all_trc_tags(header_offset_position)

self.write_modified_file(new_file_contents)


def process_directory(directory_path):
# Process all JP2 files in a given directory.
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(".jp2"):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
reader = BoxReader(file_path)
reader.read_jp2_file()


def process_s3_bucket(bucket_name, prefix=""):
# Process all JP2 files in a given S3 bucket.
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if "Contents" in response:
for obj in response["Contents"]:
if obj["Key"].lower().endswith(".jp2"):
file_path = obj["Key"]
print(f"Processing file: {file_path} from bucket {bucket_name}")
download_path = f"/tmp/{os.path.basename(file_path)}"
s3.download_file(bucket_name, file_path, download_path)
reader = BoxReader(download_path)
reader.read_jp2_file()
# Optionally, upload modified file back to S3
timestamp = datetime.datetime.now().strftime("%Y%m%d") # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(
download_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
bucket_name,
file_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
)
12 changes: 12 additions & 0 deletions src/jp2_remediator/box_reader_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from jp2_remediator.box_reader import BoxReader


class BoxReaderFactory:

def get_reader(self, file_path):
"""
Create a BoxReader instance for a given file path.
:param file_path: The path to the file to be read.
:return: A BoxReader instance.
"""
return BoxReader(file_path)
63 changes: 47 additions & 16 deletions src/jp2_remediator/main.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,62 @@
import argparse
from jp2_remediator.box_reader import BoxReader, process_directory, process_s3_bucket
from jp2_remediator.box_reader_factory import BoxReaderFactory
from jp2_remediator.processor import Processor


def main():
"""Main entry point for the JP2 file processor."""
processor = Processor(BoxReaderFactory())

parser = argparse.ArgumentParser(description="JP2 file processor")
parser.add_argument("--file", help="Path to a single JP2 file to process.")
parser.add_argument(
"--directory", help="Path to a directory of JP2 files to process."

# Create mutually exclusive subparsers for specifying input source
subparsers = parser.add_subparsers(
title="Input source", dest="input_source"
)

# Subparser for processing a single JP2 file
file_parser = subparsers.add_parser(
"file", help="Process a single JP2 file"
)
file_parser.add_argument(
"file", help="Path to a single JP2 file to process"
)
file_parser.set_defaults(
func=lambda args: processor.process_file(args.file)
)

# Subparser for processing all JP2 files in a directory
directory_parser = subparsers.add_parser(
"directory", help="Process all JP2 files in a directory"
)
directory_parser.add_argument(
"directory", help="Path to a directory of JP2 files to process"
)
directory_parser.set_defaults(
func=lambda args: processor.process_directory(args.directory)
)

# Subparser for processing all JP2 files in an S3 bucket
bucket_parser = subparsers.add_parser(
"bucket", help="Process all JP2 files in an S3 bucket"
)
bucket_parser.add_argument(
"bucket", help="Name of the AWS S3 bucket to process JP2 files from"
)
parser.add_argument(
"--bucket", help="Name of the AWS S3 bucket to process JP2 files from."
bucket_parser.add_argument(
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)",
default=""
)
parser.add_argument(
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)."
bucket_parser.set_defaults(
func=lambda args: processor.process_s3_bucket(args.bucket, args.prefix)
)

args = parser.parse_args()

if args.file:
reader = BoxReader(args.file)
reader.read_jp2_file()
elif args.directory:
process_directory(args.directory)
elif args.bucket:
process_s3_bucket(args.bucket, args.prefix)
if hasattr(args, "func"):
args.func(args)
else:
print("Please specify either --file, --directory, or --bucket.")
parser.print_help()


if __name__ == "__main__":
Expand Down
55 changes: 55 additions & 0 deletions src/jp2_remediator/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import datetime
import os
import boto3


class Processor:
"""Class to process JP2 files."""

def __init__(self, factory):
"""Initialize the Processor with a BoxReader factory."""
self.box_reader_factory = factory

def process_file(self, file_path):
"""Process a single JP2 file."""
print(f"Processing file: {file_path}")
reader = self.box_reader_factory.get_reader(file_path)
reader.read_jp2_file()

def process_directory(self, directory_path):
"""Process all JP2 files in a given directory."""
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(".jp2"):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
reader = self.box_reader_factory.get_reader(file_path)
reader.read_jp2_file()

def process_s3_bucket(self, bucket_name, prefix=""):
"""Process all JP2 files in a given S3 bucket."""
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if "Contents" in response:
for obj in response["Contents"]:
if obj["Key"].lower().endswith(".jp2"):
file_path = obj["Key"]
print(f"""Processing file: {file_path} from bucket {
bucket_name
}""")
download_path = f"/tmp/{os.path.basename(file_path)}"
s3.download_file(bucket_name, file_path, download_path)
reader = self.box_reader_factory.get_reader(download_path)
reader.read_jp2_file()
# Optionally, upload modified file back to S3
timestamp = datetime.datetime.now().strftime(
"%Y%m%d"
) # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(
download_path.replace(
".jp2", f"_modified_{timestamp}.jp2"
),
bucket_name,
file_path.replace(".jp2", f"_modified_{timestamp}.jp2")
)
Loading

0 comments on commit fef6903

Please sign in to comment.