generated from MITLibraries/python-cli-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add option to download input files using a local MinIO server
Why these changes are being introduced: * Downloading extract files improves the performance of the app by reducing requests sent to AWS S3 and avoiding repeated downloads of extract files used across multiple container runs. Having extract files available on local disk also minimizes the occurence of network issues or AWS credentials timing out during a transform. These changes introduces a locally hosted MinIO server to act as a "local S3 bucket" as part of the A/B diff workflow. How this addresses that need: * Add a Docker Compose YAML file to run local MinIO server * Add Makefile commands for starting and stopping local MinIO server * Add option '--download-files' to run-diff CLI command * Implement download_input_files core function * Update run_ab_transforms to suport use of local MinIO server Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TIMX-353
- Loading branch information
1 parent
fd3a0c8
commit d163d93
Showing
10 changed files
with
602 additions
and
331 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import logging | ||
import subprocess | ||
|
||
import boto3 | ||
from botocore.exceptions import ClientError | ||
from mypy_boto3_s3.client import S3Client | ||
|
||
from abdiff.config import Config | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
CONFIG = Config() | ||
|
||
|
||
def download_input_files(input_files: list[str]) -> None: | ||
"""Download extract files from S3 to a local MinIO server. | ||
For each file download, two AWS CLI commands are run by subprocess. | ||
The output from the first command is piped to the second command. | ||
These commands are further explained below: | ||
1. Copy the contents from the input file and direct to stdout. | ||
``` | ||
aws s3 cp <input_file> - | ||
``` | ||
2. Given the stdout from the previous command as input, copy the contents | ||
to a similarly named file on the local MinIO server. | ||
``` | ||
aws s3 cp --endpoint-url <minio_s3_url> --profile minio - <input_file> | ||
``` | ||
Note: An S3 client connected to the local MinIO server will check whether the file exists | ||
prior to any download. | ||
""" | ||
s3_client = boto3.client( | ||
"s3", | ||
endpoint_url=CONFIG.minio_s3_url, | ||
aws_access_key_id=CONFIG.minio_root_user, | ||
aws_secret_access_key=CONFIG.minio_root_password, | ||
) | ||
|
||
for input_file in input_files: | ||
if check_object_exists(CONFIG.TIMDEX_BUCKET, input_file, s3_client): | ||
logger.info(f"File found for input: {input_file}. Skipping download.") | ||
continue | ||
|
||
logger.info(f"Downloading input file from {CONFIG.TIMDEX_BUCKET}: {input_file}") | ||
copy_command = ["aws", "s3", "cp", input_file, "-"] | ||
upload_command = [ | ||
"aws", | ||
"s3", | ||
"cp", | ||
"--endpoint-url", | ||
CONFIG.minio_s3_url, | ||
"--profile", | ||
"minio", | ||
"-", | ||
input_file, | ||
] | ||
|
||
try: | ||
copy_process = subprocess.run( | ||
args=copy_command, check=True, capture_output=True | ||
) | ||
subprocess.run( | ||
args=upload_command, | ||
check=True, | ||
input=copy_process.stdout, | ||
) | ||
except subprocess.CalledProcessError: | ||
logger.exception(f"Failed to download input file: {input_file}") | ||
|
||
|
||
def check_object_exists(bucket: str, input_file: str, s3_client: S3Client) -> bool: | ||
key = input_file.replace(f"s3://{bucket}/", "") | ||
try: | ||
s3_client.head_object(Bucket=bucket, Key=key) | ||
except ClientError as exception: | ||
if exception.response["Error"]["Code"] == "404": | ||
return False | ||
logger.exception(f"Cannot determine if object exists for key {key}.") | ||
return False | ||
else: | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Settings and configurations that are common for all containers | ||
x-minio-common: &minio-common | ||
image: quay.io/minio/minio:RELEASE.2024-10-29T16-01-48Z | ||
command: server --console-address ":9001" /mnt/data | ||
ports: | ||
- "9000:9000" # API port | ||
- "9001:9001" # Console port | ||
environment: | ||
MINIO_ROOT_USER: ${MINIO_ROOT_USER} | ||
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} | ||
healthcheck: | ||
test: ["CMD", "mc", "ready", "local"] | ||
interval: 5s | ||
timeout: 5s | ||
retries: 5 | ||
|
||
services: | ||
minio: | ||
<<: *minio-common | ||
volumes: | ||
- ${MINIO_S3_LOCAL_STORAGE}:/mnt/data | ||
|
||
|
||
|