-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
S3 arguments #8
base: main
Are you sure you want to change the base?
S3 arguments #8
Changes from 9 commits
ea1d09f
9f4cb0d
d59a03b
a8baf0f
52b29b8
b49265c
3f3e2b3
fd06b0c
7f3dadf
43631b1
852a372
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,18 +37,50 @@ def main(): | |
) | ||
|
||
# Subparser for processing all JP2 files in an S3 bucket | ||
bucket_parser = subparsers.add_parser( | ||
"bucket", help="Process all JP2 files in an S3 bucket" | ||
s3_bucket_parser = subparsers.add_parser( | ||
"s3", help="Process JP2 files in an S3 bucket" | ||
) | ||
bucket_parser.add_argument( | ||
"bucket", help="Name of the AWS S3 bucket to process JP2 files from" | ||
s3_bucket_parser.add_argument( | ||
"input_bucket", help="Name of the AWS S3 bucket to process JP2 files from" | ||
) | ||
bucket_parser.add_argument( | ||
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)", | ||
default="" | ||
s3_bucket_parser.add_argument( | ||
"--input-prefix", help="Prefix of files in the AWS S3 bucket (optional)", default="" | ||
) | ||
bucket_parser.set_defaults( | ||
func=lambda args: processor.process_s3_bucket(args.bucket, args.prefix) | ||
s3_bucket_parser.add_argument( | ||
"--output-bucket", help="Name of the AWS S3 bucket to upload modified files (optional)" | ||
) | ||
s3_bucket_parser.add_argument( | ||
"--output-prefix", help="Prefix for uploaded files in the output bucket (optional)", default="" | ||
) | ||
s3_bucket_parser.set_defaults( | ||
func=lambda args: processor.process_s3_bucket( | ||
args.input_bucket, args.input_prefix, args.output_bucket, args.output_prefix | ||
) | ||
) | ||
|
||
# Subparser for processing a single JP2 file in S3 | ||
s3_file_parser = subparsers.add_parser( | ||
"s3-file", help="Process a single JP2 file in S3" | ||
) | ||
s3_file_parser.add_argument( | ||
"input_bucket", help="Name of the AWS S3 bucket containing the JP2 file" | ||
) | ||
s3_file_parser.add_argument( | ||
"--input-key", help="Key (path) of the JP2 file in the S3 bucket", required=True | ||
) | ||
s3_file_parser.add_argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make required |
||
"--output-bucket", help="Name of the AWS S3 bucket to upload the modified file (optional)" | ||
) | ||
s3_file_parser.add_argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can remove this argument There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you want to leave it as an option or remove completely? if it is optional but not used, it defaults to the input bucket There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keep this one, after discussion with andrew |
||
"--output-prefix", help="Prefix for the uploaded file in the output bucket (optional)", default="" | ||
) | ||
s3_file_parser.add_argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make required There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right now it is optional, is there a situation where you wouldn't want to use the --output-prefix option, such as if you wanted to just put it in the same input directory, or same bucket? |
||
"--output-key", help="Full key (path) for the uploaded file (overrides output-prefix)" | ||
) | ||
s3_file_parser.set_defaults( | ||
func=lambda args: processor.process_s3_file( | ||
args.input_bucket, args.input_key, args.output_bucket, args.output_prefix, args.output_key | ||
) | ||
) | ||
|
||
args = parser.parse_args() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,30 +26,50 @@ def process_directory(self, directory_path): | |
reader = self.box_reader_factory.get_reader(file_path) | ||
reader.read_jp2_file() | ||
|
||
def process_s3_bucket(self, bucket_name, prefix=""): | ||
def process_s3_file(self, input_bucket, input_key, output_bucket, output_prefix=None, output_key=None): | ||
"""Process a specific JP2 file from S3 and upload to a specified S3 location.""" | ||
s3 = boto3.client("s3") | ||
|
||
# Generate the output key dynamically if not explicitly provided | ||
if not output_key: | ||
timestamp = datetime.datetime.now().strftime("%Y%m%d") | ||
output_key = os.path.join( | ||
output_prefix, | ||
os.path.basename(input_key).replace(".jp2", f"_modified_file_{timestamp}.jp2") | ||
) | ||
|
||
# Download the file from S3 | ||
download_path = f"/tmp/{os.path.basename(input_key)}" | ||
print(f"Downloading file: {input_key} from bucket: {input_bucket}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please replace all |
||
s3.download_file(input_bucket, input_key, download_path) | ||
|
||
# Process the file | ||
reader = self.box_reader_factory.get_reader(download_path) | ||
reader.read_jp2_file() | ||
|
||
# Generate the modified file path | ||
timestamp = datetime.datetime.now().strftime("%Y%m%d") | ||
modified_file_path = download_path.replace(".jp2", f"_modified_{timestamp}.jp2") | ||
|
||
if os.path.exists(modified_file_path): | ||
print(f"Uploading modified file to bucket: {output_bucket}, key: {output_key}") | ||
s3.upload_file(modified_file_path, output_bucket, output_key) | ||
else: | ||
print(f"File {modified_file_path} does not exist, skipping upload.") | ||
|
||
def process_s3_bucket(self, input_bucket, input_prefix, output_bucket, output_prefix): | ||
"""Process all JP2 files in a given S3 bucket.""" | ||
s3 = boto3.client("s3") | ||
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | ||
response = s3.list_objects_v2(Bucket=input_bucket, Prefix=input_prefix) | ||
|
||
if "Contents" in response: | ||
for obj in response["Contents"]: | ||
if obj["Key"].lower().endswith(".jp2"): | ||
file_path = obj["Key"] | ||
print(f"""Processing file: {file_path} from bucket { | ||
bucket_name | ||
}""") | ||
download_path = f"/tmp/{os.path.basename(file_path)}" | ||
s3.download_file(bucket_name, file_path, download_path) | ||
reader = self.box_reader_factory.get_reader(download_path) | ||
reader.read_jp2_file() | ||
# Optionally, upload modified file back to S3 | ||
timestamp = datetime.datetime.now().strftime( | ||
"%Y%m%d" | ||
) # use "%Y%m%d_%H%M%S" for more precision | ||
s3.upload_file( | ||
download_path.replace( | ||
".jp2", f"_modified_{timestamp}.jp2" | ||
), | ||
bucket_name, | ||
file_path.replace(".jp2", f"_modified_{timestamp}.jp2") | ||
input_key = obj["Key"] | ||
timestamp = datetime.datetime.now().strftime("%Y%m%d") | ||
output_key = os.path.join( | ||
output_prefix, | ||
os.path.basename(input_key).replace(".jp2", f"_modified_{timestamp}.jp2") | ||
) | ||
print(f"Processing file: {input_key} from bucket: {input_bucket}") | ||
self.process_s3_file(input_bucket, input_key, output_bucket, output_key=output_key) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change from:
to:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this still be required after adding the -- flag? and it will be --input-bucket instead of --input_bucket