diff --git a/Makefile b/Makefile index 130239fbd..b0fbfb745 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ test: pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/pytest.ini pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/initial_commit -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/initial_commit/pytest.ini pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch/pytest.ini + pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/pytest.ini pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/python -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/python/pytest.ini pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/cdk -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/cdk/pytest.ini pytest src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared -vvv -s -c src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/pytest.ini diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/global.yml b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/global.yml index b0f4431f3..84998fa52 100644 --- a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/global.yml +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/global.yml @@ -775,11 +775,14 @@ Resources: commands: - npm install cdk@1.169 -g -y --quiet --no-progress - aws s3 cp s3://$SHARED_MODULES_BUCKET/adf-build/ ./adf-build/ --recursive --quiet - - pip install -r adf-build/requirements.txt -q -t ./adf-build + - pip install -r adf-build/requirements.txt -r adf-build/helpers/requirements.txt -q -t ./adf-build + pre_build: + commands: + - mkdir -p deployment_maps build: commands: - - bash -c "[[ -e deployment_map.yml ]] && echo 'Copying deployment_map.yml' && aws s3 cp deployment_map.yml s3://$ADF_PIPELINES_BUCKET/deployment_map.yml || echo 'No deployment_map.yml, skipping copy'" - - bash -c "[[ -e deployment_maps ]] && echo 'Syncing deployment_maps folder' && aws s3 sync deployment_maps s3://$ADF_PIPELINES_BUCKET/deployment_maps || echo 'No deployment_maps folder, skipping sync'" + - python adf-build/helpers/sync_to_s3.py --metadata adf_version=${!ADF_VERSION} --upload-with-metadata execution_id=${!CODEPIPELINE_EXECUTION_ID} deployment_map.yml s3://$ADF_PIPELINES_BUCKET/deployment_map.yml + - python adf-build/helpers/sync_to_s3.py --extension .yml --extension .yaml --metadata adf_version=${!ADF_VERSION} --upload-with-metadata execution_id=${!CODEBUILD_BUILD_NUMBER} --recursive deployment_maps s3://$ADF_PIPELINES_BUCKET/deployment_maps post_build: commands: - echo "Pipelines are updated in the AWS Step Functions ADFPipelineManagementStateMachine." @@ -829,6 +832,14 @@ Resources: - Name: "Source" Configuration: ProjectName: !Ref CodeBuildProject + EnvironmentVariables: >- + [ + { + "name": "CODEPIPELINE_EXECUTION_ID", + "value": "#{codepipeline.PipelineExecutionId}", + "type": "PLAINTEXT" + } + ] RunOrder: 1 PipelineSNSTopic: diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch/requirements.txt b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch/requirements.txt index da6ec56c2..2a653ddc1 100644 --- a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch/requirements.txt +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-bootstrap/deployment/lambda_codebase/determine_default_branch/requirements.txt @@ -1,2 +1,2 @@ -boto3==1.18.2 +boto3==1.24.59 cfn-custom-resource~=1.0.1 diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/__init__.py b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/__init__.py new file mode 100644 index 000000000..b0f3b0cc9 --- /dev/null +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/pytest.ini b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/pytest.ini new file mode 100644 index 000000000..015e8596c --- /dev/null +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +norecursedirs = terraform diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/sync_to_s3.py b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/sync_to_s3.py new file mode 100644 index 000000000..59b46b0a4 --- /dev/null +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/sync_to_s3.py @@ -0,0 +1,915 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +""" +Sync files to an S3 Bucket. + +This script will only overwrite files when the content changed. +To determine a file changed, it will calculate the SHA-256 hash of the file +and match that against the object SHA256 hash metadata stored along with the +S3 object. + +If a file is stored inside the S3 Bucket that is no longer present +locally, it will clean it up too. + +Usage: + sync_to_s3.py [-v... | --verbose...] [-r | --recursive] [-d | --delete] + [-e | --extension ]... + [--metadata =]... + [--upload-with-metadata =]... + [--] + SOURCE_PATH DESTINATION_S3_URL + + sync_to_s3.py -h | --help + + sync_to_s3.py --version + +Options: + -d, --delete + Delete stale files that are located in the destination bucket + with the corresponding S3 prefix. For example, if the + destination is set to s3://my-bucket/my-prefix, it will sync + all files inside the prefix location. If a file is located + there, that is not present in the source path locally, it will + get deleted. But only if the file extension of that path + matches (if one is specified). + + -e, --extension + File extension filter. Files that match locally are only + uploaded if their extension matches. If this parameter is not + specified, it will not apply a filter. Matching all files that + are found locally. The same filter is also applied on the + destination. For example, if the destination S3 location + contains a README.md file, while the extension is configured + to match '.yml', it will not delete the README.md file as its + extension is not a match. + + -h, --help Show this help message. + + --metadata = + The key and value pairs that are passed with this argument + will be added to the metadata. If the metadata set using this + argument does not match the metadata on the S3 object, it will + perform an update too. + + -r, --recursive + Indicating that the is a directory, and it + should recursively walk through the source directories and sync + those to the S3 bucket. + + --upload-with-metadata = + When a file is uploaded, the key and value pairs that are + passed with this argument will be added. It will only apply + these metadata properties if the file is missing, or the + content of the file or any of the `--metadata` properties did + not match. + + -v, --verbose + Show verbose logging information. + + + The source path where the original files are stored that should + by synced to the destination bucket. When you specify a + directory as the source path it will copy the files inside the + directory to the S3 bucket if you also specify the recursive + flag. Otherwise it will treat the source path as a file, when a + directory is detected instead it will abort with an error. + If the source path is a directory, the object keys that are + derived from the files inside the directory will be relative to + the . For example, if the equals + `./adf-accounts`, which contains a file named + `adf-accounts/adf.yml`, it will copy the file as `adf.yml`. + If the prefix of the s3 bucket is set to `adf-s3-accounts`, the + final key of that specific file will be: + `adf-s3-accounts/adf.yml`. + If the is a file and + the recursive flag is not specified, it will expect that the + s3 prefix is the new object name instead. In this case, if + equals `./deployment_map.yml` and the s3 prefix + is `root_deployment_map.yml`, it will copy the file to the s3 + prefix key. + + + The destination bucket and its prefix where the files should be + copied to. The s3 bucket and its optional prefix should be + specified as: s3://your-bucket-name/your-optional-prefix. + In this case, `your-bucket-name` is the name of the bucket. + While `your-optional-prefix` is the name of the prefix used for + all files that are copied to S3. If a directory is copied, i.e. + recursive is set, it will prepend the prefix to the object + keys of the files that are synced. If a file is copied instead, + i.e. no --recurdive, it will use the s3 prefix as the target + object key to use for that file. + +Examples: + + Copy the deployment_map.yml file to an S3 bucket as + root_deployment_map.yml, and delete the root_deployment_map.yml if the + local deployment_map.yml file is missing: + + $ python sync_to_s3.py -d deployment_map.yml \\ + s3://deploy-bucket/root_deployment_map.yml + + Copy all .yml files from the deployment_maps folder to an S3 bucket where + the objects are prefixed with the `deployment_map/`, deleting the .yml + objects inside the deployment_map that no longer exist locally. + + $ python sync_to_s3.py -d -e .yml -r deployment_maps \\ + s3://deploy-bucket/deployment_maps + + Copy all .yml files from folder source_folder to the to an S3 bucket where + the objects are prefixed with the `object_folder/`, deleting the .yml + objects inside the YAML files that no longer exist locally. Additionally, + all files will get the metadata set to include `adf_version`. And if the + file is uploaded/updated, it will also apply the `execution_id` metadata. + + $ python sync_to_s3.py -d -e .yml -r source_folder \\ + --metadata "adf_version=x.y.z" \\ + --upload-with-metadata "execution_id=$EXEC_ID" \\ + s3://deploy-bucket/object_folder +""" + +import os +import sys +from typing import Mapping, TypedDict +from pathlib import Path +from urllib.parse import urlparse +import hashlib +import logging +import base64 +import boto3 +from docopt import docopt + + +ADF_VERSION = os.environ.get("ADF_VERSION") +ADF_LOG_LEVEL = os.environ.get("ADF_LOG_LEVEL", "INFO") +NON_RECURSIVE_KEY = '%%-single-match-%%' + +logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(ADF_LOG_LEVEL) + + +class GenericFileData(TypedDict): + """ + Generic File or Object Data class. + """ + key: str + + +class LocalFileData(GenericFileData): + """ + Local File Data class, extended from the GenericFileData. + """ + file_path: str + sha256_hash: str + + +class S3ObjectData(GenericFileData): + """ + S3 Object Data class, extended from the GenericFileData. + """ + metadata: dict[str, str] + + +class MetadataToCheck(TypedDict): + always_apply: dict[str, str] + upon_upload_apply: dict[str, str] + + +def get_local_files( + local_path: str, + file_extensions: [str], + recursive: bool, +) -> Mapping[str, LocalFileData]: + """ + Retrieve the files that are in the relative path local_path. + This will perform a search inside a directory if the local_path is a + directory and the recursive flag is set. Alternatively, it will determine + if a specific file exists and if so, it will retrieve that one only. + + Args: + local_path (str): The local path to search in/lookup. + + file_extensions ([str]): The file_extensions to search for, or empty + list if this filter should not be applied. + + recursive (bool): Whether to search recursively or not. + + Returns: + Mapping[str, LocalFileData]: The map of the Local File Data objects + representing the local file(s) that were found. + The keys of the map are derived from the local file path relative + to the local_path. With a single object, the key used is + a special non recursive identifier key instead. + The value of the map is the Local File Data. + """ + if recursive: + return _get_recursive_local_files( + local_path, + file_extensions, + ) + return _get_single_local_file( + local_path, + ) + + +def _get_recursive_local_files( + local_path: str, + file_extensions: [str], +) -> Mapping[str, LocalFileData]: + """ + Retrieve the files that are in the relative path local_path. + A search is performed using the specified glob if one is specified. + Do not specify the glob in case only a single file should be matched. + + Args: + local_path (str): The local files to search in. + + file_extensions ([str]): The file_extensions to search for, or empty + list if this filter should not be applied. This will be converted + to a glob search, where the extension ".yml" will match files with + the glob search "**/*.yml", returning any YAML file that ends with + .yml. Including those in subdirectories. + + Returns: + Mapping[str, LocalFileData]: The map of the Local File Data objects + representing the local files that were found. + The keys of the map are derived from the local file path relative + to the local_path. + The value of the map is the Local File Data. + """ + path = get_full_local_path(local_path) + LOGGER.debug( + "Searching for local files in %s matching %s", + str(path), + file_extensions, + ) + local_files = {} + globs_to_match = [ + f"**/*{ext}" + for ext in ( + # File extensions or a list of an empty string, so it either + # generates "**/*{ext}" for each extension in file_extensions + # or it generates "**/*" + file_extensions or [""] + ) + ] + for glob in globs_to_match: + for file_path in path.glob(glob): + local_file_data = _get_local_file_data(file_path, path) + local_files[local_file_data['key']] = local_file_data + + LOGGER.debug( + "Found %d local files: %s", + len(local_files.keys()), + local_files, + ) + return local_files + + +def _get_single_local_file( + local_path: str, +) -> Mapping[str, LocalFileData]: + """ + Retrieve the file that is at the relative path local_path, or None if that + does not exist. + + Args: + local_path (str): The local files to search in. + + Returns: + Mapping[str, LocalFileData]: The map of the Local File Data object + representing the local file if one is found. + The keys of the map are derived from the local file path relative + to the local_path. + The value of the map is the Local File Data. + """ + path = get_full_local_path(local_path) + LOGGER.debug( + "Checking if local file at %s exists", + str(path), + ) + local_files = {} + if path.exists(): + local_file_data = _get_local_file_data(path, path.parent) + local_files[NON_RECURSIVE_KEY] = local_file_data + LOGGER.debug( + "File exists: %s", + local_files, + ) + else: + LOGGER.debug( + "File does not exist at: %s", + path, + ) + + return local_files + + +def _get_local_file_data( + file_path: Path, + relative_to_path: Path, +) -> LocalFileData: + """ + Get the local file data for the given path. + + This will open the file, calculate its hash and return that + with in a LocalFileData object. + + Args: + file_path (Path): The path of the file to read. + + relative_to_path (Path): The path that should be used to determine the + relative path of the local file. If an object lives inside + `x_path/y_path`. And the relative_to_path is set to `x_path`, the + key of the local file will become: `y_path`. + + Returns: + LocalFileData: The LocalFileData instance that holds the file + information such as the sha256_hash, its relative path, etc. + """ + with open(file_path, "rb", buffering=0) as file_pointer: + file_hash = hashlib.sha256() + memory_view = memoryview(bytearray(1024*1024)) + while data_read := file_pointer.readinto(memory_view): + file_hash.update(memory_view[:data_read]) + relative_path = str(file_path.relative_to(relative_to_path)) + return { + "key": relative_path, + "file_path": str(file_path), + "sha256_hash": str(base64.b64encode(file_hash.digest())), + } + + +def get_s3_objects( + s3_client: any, + s3_bucket: str, + s3_prefix: str, + file_extensions: [str], + recursive: bool, +): + """ + Retrieve the object or objects that are stored inside the S3 bucket. + When asked to search recursively, it will perform a search on the S3 bucket + using the specified prefix and file extension. + While it will perform a single object lookup otherwise. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + s3_bucket (str): The bucket name. + s3_prefix (str): The prefix under which the objects are stored in + the bucket. + file_extensions ([str]): The file extensions of objects that would + match. + recursive (bool): Whether to search recursively or not. + + Returns: + Mapping[str, S3ObjectData]: The map of the S3 objects that were + found. + """ + if recursive: + return _get_recursive_s3_objects( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + ) + + return _get_single_s3_object( + s3_client, + s3_bucket, + s3_prefix, + ) + + +def _get_recursive_s3_objects( + s3_client: any, + s3_bucket: str, + s3_prefix: str, + file_extensions: [str], +) -> Mapping[str, S3ObjectData]: + """ + Retrieve the objects that are stored inside the S3 bucket, which keys + start with the specified s3_prefix. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + s3_bucket (str): The bucket name. + s3_prefix (str): The prefix under which the objects are stored in + the bucket. + file_extensions ([str]): The file extension of objects that would + match. + + Returns: + Mapping[str, S3ObjectData]: The map of the S3 objects that were + found. The keys of the map are derived from the object key relative + to the s3_prefix. Unless the key is equal to the s3_prefix, in that + case the full object key is used as the key. The value of the map + is the S3 Object Data. + """ + LOGGER.debug( + "Searching for S3 objects in s3://%s/%s", + s3_bucket, + s3_prefix, + ) + s3_list_objects_paginator = s3_client.get_paginator("list_objects_v2") + s3_object_iterator = s3_list_objects_paginator.paginate( + Bucket=s3_bucket, + Prefix=f"{s3_prefix}/", + ) + s3_objects = {} + for response_data in s3_object_iterator: + for obj in response_data.get("Contents", []): + matched_extensions = list( + # The filter matches its Key against the file_extensions + # to see if it ends with that specific extension. + # This will return an empty list if it did not match or + # if the file_extensions is empty. + filter(obj.get("Key").endswith, file_extensions) + ) + if file_extensions and not matched_extensions: + # If we should filter on extensions and we did not match + # with any, we should skip this object. + continue + index_key = convert_to_local_key(obj.get("Key"), s3_prefix) + s3_objects[index_key] = _get_s3_object_data( + s3_client, + s3_bucket, + obj.get("Key"), + ) + + LOGGER.debug( + "Found %d S3 objects at: s3://%s/%s: %s", + len(s3_objects.keys()), + s3_bucket, + s3_prefix, + s3_objects, + ) + return s3_objects + + +def _get_single_s3_object( + s3_client: any, + s3_bucket: str, + s3_object_key: str, +) -> Mapping[str, S3ObjectData]: + """ + Retrieve a single object that is stored inside the S3 bucket, which object + key equals the specified s3_object_key. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + s3_bucket (str): The bucket name. + s3_object_key (str): The object key under which the object might or + should be stored in the bucket. + + Returns: + Mapping[str, S3ObjectData]: The map of the S3 objects that were + found. The keys of the map is set to the non recursive identifier. + The value of the map is the S3 Object Data. + """ + LOGGER.debug( + "Searching for S3 object in s3://%s/%s", + s3_bucket, + s3_object_key, + ) + s3_object_data = _get_s3_object_data( + s3_client, + s3_bucket, + s3_object_key, + ) + if not s3_object_data: + return {} + + s3_objects = {} + s3_objects[NON_RECURSIVE_KEY] = s3_object_data + + LOGGER.debug( + "Found S3 object at: s3://%s/%s: %s", + s3_bucket, + s3_object_key, + s3_objects, + ) + return s3_objects + + +def _get_s3_object_data(s3_client, s3_bucket, key): + try: + obj_data = s3_client.head_object( + Bucket=s3_bucket, + Key=key, + ) + return { + "key": key, + "metadata": obj_data.get("Metadata", {}), + } + except s3_client.exceptions.NoSuchKey: + LOGGER.debug( + "Could not find s3://%s/%s", + s3_bucket, + key, + ) + return None + + +def upload_changed_files( + s3_client: any, + s3_bucket: str, + s3_prefix: str, + local_files: Mapping[str, LocalFileData], + s3_objects: Mapping[str, S3ObjectData], + metadata_to_check: MetadataToCheck, +): + """ + Upload changed files, by looping over the local files found and checking + if these still exist in the S3 bucket as objects. If they do, the SHA256 + hash is compared. The file is uploaded to the bucket if the file is + missing or when the SHA256 hash does not match. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + + s3_bucket (str): The bucket name. + + s3_prefix (str): The prefix under which the objects are stored in + the bucket. + + local_files (Mapping[str, LocalFileData]): The map of LocalFileData + objects, representing the files that were found locally. + + s3_objects (Mapping[str, S3ObjectData]): The map of S3ObjectData + objects representing the objects that were found in the S3 bucket. + + metadata_to_check (MetadataToCheck): The metadata that needs to be + applied all the time and upon upload only. + """ + for key, local_file in local_files.items(): + s3_file = s3_objects.get(key) + + object_is_missing = s3_file is None + s3_metadata = {} if object_is_missing else s3_file["metadata"] + content_changed = ( + s3_metadata.get("sha256_hash") != local_file.get("sha256_hash") + ) + metadata_changed = ( + dict(filter( + lambda item: item[0] in metadata_to_check["always_apply"], + s3_metadata.items(), + )) != metadata_to_check["always_apply"] + ) + if (object_is_missing or content_changed or metadata_changed): + with open(local_file.get("file_path"), "rb") as file_pointer: + s3_key = convert_to_s3_key(key, s3_prefix) + + LOGGER.info( + "Uploading file %s to s3://%s/%s because the %s", + local_file.get("file_path"), + s3_bucket, + s3_key, + ( + "object does not exist yet" if object_is_missing + else ( + "file content changed" if content_changed + else "metadata changed" + ) + ), + ) + s3_client.put_object( + Body=file_pointer, + Bucket=s3_bucket, + Key=s3_key, + Metadata={ + **metadata_to_check['always_apply'], + **metadata_to_check['upon_upload_apply'], + "sha256_hash": local_file.get("sha256_hash"), + } + ) + + +def delete_stale_objects( + s3_client: any, + s3_bucket: str, + s3_prefix: str, + local_files: Mapping[str, LocalFileData], + s3_objects: Mapping[str, S3ObjectData], +): + """ + Delete stale files, by looping over the objects found in S3 and checking + if these still exist locally. If not, they are stale and need to be + deleted. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + s3_bucket (str): The bucket name. + s3_prefix (str): The prefix under which the objects are stored in + the bucket. + local_files (Mapping[str, LocalFileData]): The map of LocalFileData + objects, representing the files that were found locally. + s3_objects (Mapping[str, S3ObjectData]): The map of S3ObjectData + objects representing the objects that were found in the S3 bucket. + """ + to_delete = [] + for key in s3_objects.keys(): + if local_files.get(key) is None: + s3_key = convert_to_s3_key(key, s3_prefix) + to_delete.append({ + "Key": s3_key, + }) + + if to_delete: + LOGGER.info( + "Deleting stale objects in s3://%s: %s", + s3_bucket, + to_delete, + ) + s3_client.delete_objects( + Bucket=s3_bucket, + Delete={ + "Objects": to_delete, + }, + ) + + +def clean_s3_prefix(original_prefix: str) -> str: + """ + Clean the S3 prefix, such that it does not start with a slash + and does not end with a slash. + + i.e. `/some/path/` will become `some/path` + + Args: + original_prefix (str): The original prefix that should be cleaned. + + Returns: + str: The cleaned prefix. + """ + new_prefix = ( + original_prefix[1:] if original_prefix.startswith("/") + else original_prefix + ) + + if original_prefix.endswith("/"): + return new_prefix[:-1] + + return new_prefix + + +def get_full_local_path(local_path: str) -> Path: + """ + Convert the local path str to the full Path. + + Args: + local_path (Path): The path where it should run the search from. + Can be an absolute path or a relative path to the current working + directory. Both will be translated to a full Path. + + Returns: + Path: The full Path instance pointing to the local_path + relative to the directory this command was executed from. Or the + Path instance pointing to the local_path if that is an absolute + path already. + """ + path = Path(local_path) + if path.is_absolute(): + return path + + here = Path(os.getcwd()) + return here / path + + +def convert_to_s3_key(local_key, s3_prefix): + """ + Convert the local key to an S3 key. + + Args: + local_key (str): The local key of the file (relative to the directory). + s3_prefix (str): The S3 prefix that is in use. + + Returns: + str: Returns the s3_prefix if that matches the local_key. + When it did not match, it returns the `/{s3_prefix}/{local_key}` + """ + if s3_prefix and local_key == NON_RECURSIVE_KEY: + return s3_prefix + + if s3_prefix and local_key != s3_prefix: + return f"{s3_prefix}/{local_key}" + + return local_key + + +def convert_to_local_key(s3_key, s3_prefix): + """ + Convert the S3 key to a local key. + + Args: + s3_key (str): The s3 key of the object includes the s3 prefix. + s3_prefix (str): The S3 prefix that is in use. + + Returns: + str: Returns the local key if that matches the s3_prefix. + When it did not match, it removes the s3 prefix and returns + the relative local_key. + """ + if s3_prefix and s3_key != s3_prefix: + return str(Path(s3_key).relative_to(s3_prefix)) + + return s3_key + + +def ensure_valid_input( + local_path: str, + file_extensions: [str], + s3_url: str, + s3_bucket: str, + s3_prefix: str, + recursive: bool, +): + if not local_path: + LOGGER.error( + "Input error: You need to specify the source path!" + ) + sys.exit(1) + + if not s3_url: + LOGGER.error( + "Input error: You need to specify the destination S3 url!" + ) + sys.exit(2) + + if not recursive and not s3_prefix: + LOGGER.error( + "Input error: Requested to sync single object, but no S3 object " + "location was specified! " + ) + LOGGER.error( + "In case you would like to sync a single object " + "to %s, you will need to specify the full object location. " + "For example, s3://%s/this-is-the-target-object-location.yml", + s3_url, + s3_bucket, + ) + sys.exit(3) + + full_path = get_full_local_path(local_path) + if recursive and not full_path.exists(): + LOGGER.error( + "Input error: The source path %s does not exist!", + local_path, + ) + sys.exit(4) + + if not recursive and full_path.exists() and full_path.is_dir(): + LOGGER.error( + "Input error: When syncing a single file the source path %s " + "should be referencing a file not a directory!", + local_path, + ) + sys.exit(5) + + if file_extensions and not recursive: + LOGGER.warning("Input warning: Ignoring file_extension filter.") + LOGGER.warning( + "Input warning: The file_extension filter is not applied " + "when you are trying to sync a single file to S3. " + "The --extension argument is only compatible when " + "performing a --recursive directory sync." + ) + + + +def sync_files( + s3_client: any, + local_path: str, + file_extensions: [str], + s3_url: str, + recursive: bool, + delete: bool, + metadata_to_check: MetadataToCheck, +): + """ + Sync files using the S3 client from the local_path, matching the local_glob + to the specific s3_url. + + Args: + s3_client (Boto3.Client): The Boto3 S3 Client to interact with when + a file needs to be deleted. + + local_path (str): The local path where the source files are stored. + + file_extensions ([str]): The extensions to search for files inside a + specific path. For example, [".yml", ".yaml"] will return all + YAML files, including those in sub directories. + + s3_url (str): The S3 URL to use, for example + S3://bucket/specific/prefix. + + recursive (bool): Whether to search the source directory recursively + or not. + + delete (bool): Whether to delete stale objects from the S3 bucket if + the source file no longer exists. + + metadata_to_check (MetadataToCheck): The metadata that needs to be + applied all the time and upon upload only. + """ + s3_url_details = urlparse(s3_url) + s3_bucket = s3_url_details.netloc + s3_prefix = clean_s3_prefix(str(s3_url_details.path)) + + ensure_valid_input( + local_path, + file_extensions, + s3_url, + s3_bucket, + s3_prefix, + recursive, + ) + + local_files = get_local_files(local_path, file_extensions, recursive) + + s3_objects = get_s3_objects( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive, + ) + + upload_changed_files( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + if delete: + delete_stale_objects( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + ) + + +def main(): # pylint: disable=R0915 + """Main function to sync files to S3""" + + options = docopt(__doc__, version=ADF_VERSION, options_first=True) + # In case the user asked for verbose logging, increase + # the log level to debug. + if options["--verbose"] > 0: + LOGGER.setLevel(logging.DEBUG) + if options["--verbose"] > 1: + # Also enable DEBUG mode for other libraries, like boto3 + logging.basicConfig(level=logging.DEBUG) + + LOGGER.debug("Input arguments: %s", options) + + local_path = options.get('SOURCE_PATH') + # Remove duplicates from file extension list if there are any + file_extensions = list(set(options.get('--extension'))) + s3_url = options.get('DESTINATION_S3_URL') + recursive = options.get('--recursive', False) + delete = options.get('--delete', False) + + # Convert metadata key and value lists into a dictionary + metadata_to_check: MetadataToCheck = { + 'always_apply': dict(map( + lambda kv_pair: ( + kv_pair[:kv_pair.find("=")], + kv_pair[(kv_pair.find("=") + 1):] + ), + options['--metadata'], + )), + 'upon_upload_apply': dict(map( + lambda kv_pair: ( + kv_pair[:kv_pair.find("=")], + kv_pair[(kv_pair.find("=") + 1):] + ), + options['--upload-with-metadata'], + )), + } + + s3_client = boto3.client("s3") + sync_files( + s3_client, + local_path, + file_extensions, + s3_url, + recursive, + delete, + metadata_to_check, + ) + LOGGER.info("All done.") + + +if __name__ == "__main__": + main() diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/__init__.py b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/__init__.py new file mode 100644 index 000000000..e164948b0 --- /dev/null +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/__init__.py @@ -0,0 +1,11 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +""" +__init__ for tests module +""" + +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/test_sync_to_s3.py b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/test_sync_to_s3.py new file mode 100644 index 000000000..80c0fe125 --- /dev/null +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/test_sync_to_s3.py @@ -0,0 +1,1049 @@ +import os +from typing import Mapping +from pathlib import Path +from copy import deepcopy +from mock import Mock, patch, call, ANY +import pytest +from base64 import b64encode +from hashlib import sha256 +import tempfile +from sync_to_s3 import * + +# pylint: skip-file + +S3_PREFIX = "s3-prefix" +CURRENT_VERSION = "This is the current version on S3".encode("utf-8") +NEW_VERSION = "This will be uploaded to S3".encode("utf-8") +CURRENT_HASH = str(b64encode(sha256(CURRENT_VERSION).digest())) +NEW_HASH = str(b64encode(sha256(NEW_VERSION).digest())) +UPLOAD_PREVIOUS_METADATA = { + "execution_id": "a-b-c-d", +} +UPLOAD_NEW_METADATA = { + "execution_id": "b-c-d-e", +} +PREVIOUS_METADATA = { + "adf_version": "x.y.z", + "another_key": "and_its_value", +} +CURRENT_METADATA = { + "adf_version": "x.y.z+1", + "another_key": "and_its_value", +} +IRRELEVANT_METADATA = { + "irrelevant_metadata": "some irrelevant value", + "another_irrelevant_key": "and-value", +} + +EXAMPLE_LOCAL_FILES: Mapping[str, LocalFileData] = { + "first-file.yml": { + "key": "first-file.yml", + "file_path": "/full/path/first-file.yml", + "sha256_hash": CURRENT_HASH, + }, + "second-file.yaml": { + "key": "second-file.yaml", + "file_path": "/full/path/second-file.yaml", + "sha256_hash": CURRENT_HASH, + }, + "second-file.yaml": { + "key": "second-file.yaml", + "file_path": "/full/path/second-file.yaml", + "sha256_hash": CURRENT_HASH, + }, + "needs-new-metadata-file.yaml": { + "key": "needs-new-metadata-file.yaml", + "file_path": "/full/path/needs-new-metadata-file.yaml", + "sha256_hash": CURRENT_HASH, + }, + "updated-file.yml": { + "key": "updated-file.yml", + "file_path": "/full/path/updated-file.yml", + "sha256_hash": NEW_HASH, + }, + "missing-file.yml": { + "key": "missing-file.yml", + "file_path": "/full/path/missing-file.yml", + "sha256_hash": NEW_HASH, + }, +} +EXAMPLE_S3_OBJECTS: Mapping[str, S3ObjectData] = { + "first-file.yml": { + "key": f"{S3_PREFIX}/first-file.yml", + "metadata": { + **CURRENT_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + } + }, + "second-file.yaml": { + "key": f"{S3_PREFIX}/second-file.yaml", + "metadata": { + **CURRENT_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + } + }, + "needs-new-metadata-file.yaml": { + "key": f"{S3_PREFIX}/needs-new-metadata-file.yaml", + "metadata": { + **PREVIOUS_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + } + }, + "updated-file.yml": { + "key": f"{S3_PREFIX}/updated-file.yml", + "metadata": { + **CURRENT_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + } + }, + "stale-file.yml": { + "key": f"{S3_PREFIX}/stale-file.yml", + "metadata": { + **PREVIOUS_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + } + }, +} + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_empty_directory(get_full_local_path): + file_extensions = [".yml"] + with tempfile.TemporaryDirectory() as directory_path: + get_full_local_path.return_value = Path(directory_path) + + assert get_local_files( + directory_path, + file_extensions, + recursive=True, + ) == {} + + get_full_local_path.assert_called_once_with(directory_path) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_non_recursive_missing_file(get_full_local_path): + with tempfile.TemporaryDirectory() as directory_path: + local_path = Path(directory_path) / "missing-file.yml" + get_full_local_path.return_value = local_path + + assert get_local_files( + str(local_path), + file_extensions=[], + recursive=False, + ) == {} + + get_full_local_path.assert_called_once_with(str(local_path)) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_recursive(get_full_local_path): + file_extensions = [".yml", ".yaml"] + example_local_files = deepcopy(EXAMPLE_LOCAL_FILES) + example_local_files["README.md"] = { + "key": "README.md", + "file_path": "/full/path/README.md", + "sha256_hash": NEW_HASH, + } + example_local_files["some-other-config.json"] = { + "key": "some-other-config.json", + "file_path": "/full/path/some-other-config.json", + "sha256_hash": CURRENT_HASH, + } + with tempfile.TemporaryDirectory() as directory_path: + get_full_local_path.return_value = Path(directory_path) + + for file in example_local_files.values(): + tmp_file_path = Path(directory_path) / file.get("key") + with open(tmp_file_path, "wb", buffering=0) as file_pointer: + file["file_path"] = str(Path(directory_path) / file.get("key")) + file_pointer.write( + NEW_VERSION if file.get("key") in [ + "updated-file.yml", + "missing-file.yml", + "README.md" + ] else CURRENT_VERSION + ) + return_local_files = deepcopy(example_local_files) + del return_local_files["README.md"] + del return_local_files["some-other-config.json"] + + assert get_local_files( + directory_path, + file_extensions, + recursive=True, + ) == return_local_files + + get_full_local_path.assert_called_once_with(directory_path) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_recursive_any(get_full_local_path): + file_extensions = [] + example_local_files = deepcopy(EXAMPLE_LOCAL_FILES) + example_local_files["README.md"] = { + "key": "README.md", + "file_path": "/full/path/README.md", + "sha256_hash": NEW_HASH, + } + example_local_files["some-other-config.json"] = { + "key": "some-other-config.json", + "file_path": "/full/path/some-other-config.json", + "sha256_hash": CURRENT_HASH, + } + with tempfile.TemporaryDirectory() as directory_path: + get_full_local_path.return_value = Path(directory_path) + + for file in example_local_files.values(): + tmp_file_path = Path(directory_path) / file.get("key") + with open(tmp_file_path, "wb", buffering=0) as file_pointer: + file["file_path"] = str(Path(directory_path) / file.get("key")) + file_pointer.write( + NEW_VERSION if file.get("key") in [ + "updated-file.yml", + "missing-file.yml", + "README.md" + ] else CURRENT_VERSION + ) + + assert get_local_files( + directory_path, + file_extensions, + recursive=True, + ) == example_local_files + + get_full_local_path.assert_called_once_with(directory_path) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_recursive_unrelated_only(get_full_local_path): + file_extensions = [".xml"] + example_local_files = deepcopy(EXAMPLE_LOCAL_FILES) + with tempfile.TemporaryDirectory() as directory_path: + get_full_local_path.return_value = Path(directory_path) + + for file in example_local_files.values(): + tmp_file_path = Path(directory_path) / file.get("key") + with open(tmp_file_path, "wb", buffering=0) as file_pointer: + file["file_path"] = str(Path(directory_path) / file.get("key")) + file_pointer.write( + NEW_VERSION if file.get("key") in [ + "updated-file.yml", + "missing-file.yml", + ] else CURRENT_VERSION + ) + + assert get_local_files( + directory_path, + file_extensions, + recursive=True, + ) == {} + + get_full_local_path.assert_called_once_with(directory_path) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_files_recursive_no_filter(get_full_local_path): + file_extensions = [] + example_local_files = deepcopy(EXAMPLE_LOCAL_FILES) + example_local_files["README.md"] = { + "key": "README.md", + "file_path": "/full/path/README.md", + "sha256_hash": CURRENT_HASH, + } + example_local_files["some-other-config.json"] = { + "key": "some-other-config.json", + "file_path": "/full/path/some-other-config.json", + "sha256_hash": CURRENT_HASH, + } + with tempfile.TemporaryDirectory() as directory_path: + get_full_local_path.return_value = Path(directory_path) + + for file in example_local_files.values(): + tmp_file_path = Path(directory_path) / file.get("key") + with open(tmp_file_path, "wb", buffering=0) as file_pointer: + file["file_path"] = str(Path(directory_path) / file.get("key")) + file_pointer.write( + NEW_VERSION if file.get("key") in [ + "updated-file.yml", + "missing-file.yml", + ] else CURRENT_VERSION + ) + + assert get_local_files( + directory_path, + file_extensions, + recursive=True, + ) == example_local_files + + get_full_local_path.assert_called_once_with(directory_path) + + +@patch("sync_to_s3.get_full_local_path") +def test_get_local_file_non_recursive(get_full_local_path): + example_local_files = {} + file_name = "updated-file.yml" + example_local_files[NON_RECURSIVE_KEY] = ( + deepcopy(EXAMPLE_LOCAL_FILES[file_name]) + ) + with tempfile.TemporaryDirectory() as directory_path: + tmp_file_path = Path(directory_path) / file_name + get_full_local_path.return_value = tmp_file_path + + with open(tmp_file_path, mode="wb", buffering=0) as file_pointer: + example_local_files[NON_RECURSIVE_KEY]["file_path"] = str( + tmp_file_path, + ) + file_pointer.write(NEW_VERSION) + + assert get_local_files( + file_pointer.name, + file_extensions=[], + recursive=False, + ) == example_local_files + + get_full_local_path.assert_called_once_with(file_pointer.name) + + +def test_get_s3_objects_recursive_empty_bucket(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + file_extensions = [".yml"] + + paginator = Mock() + s3_client.get_paginator.return_value = paginator + paginator.paginate.return_value = [ + {}, + ] + + assert get_s3_objects( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive=True, + ) == {} + + +def test_get_s3_objects_recursive_unrelated_files_only(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + file_extensions = [".yml"] + + paginator = Mock() + s3_client.get_paginator.return_value = paginator + paginator.paginate.return_value = [ + { + "Contents": [ + { + "Key": "README.md", + }, + { + "Key": "other-file.json", + }, + { + "Key": "another-file.yaml", + } + ], + }, + ] + + assert get_s3_objects( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive=True, + ) == {} + + +def test_get_s3_objects_non_recursive_missing_object(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_object_key = f"{S3_PREFIX}/missing-file.yml" + file_extensions = [] + + s3_client.exceptions.NoSuchKey = Exception + s3_client.head_object.side_effect = s3_client.exceptions.NoSuchKey() + + assert get_s3_objects( + s3_client, + s3_bucket, + s3_object_key, + file_extensions, + recursive=False, + ) == {} + + +def test_get_s3_objects_recursive_success(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + example_s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + file_extensions = [".yml", ".yaml"] + + paginator = Mock() + s3_client.get_paginator.return_value = paginator + + s3_obj_keys = list(map( + lambda obj: { + "Key": obj["key"], + }, + example_s3_objects.values(), + )) + s3_obj_data = dict(map( + lambda obj: ( + obj["key"], + { + "Key": obj["key"], + "Metadata": obj["metadata"], + } + ), + example_s3_objects.values(), + )) + paginator.paginate.return_value = [ + { + "Contents": s3_obj_keys[:2], + }, + { + "Contents": [ + { + "Key": "README.md", + }, + { + "Key": "other-file.json", + } + ], + }, + { + "Contents": s3_obj_keys[2:], + }, + ] + s3_client.head_object.side_effect = ( + lambda **kwargs: s3_obj_data[kwargs["Key"]] + ) + + assert get_s3_objects( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive=True, + ) == example_s3_objects + + s3_client.get_paginator.assert_called_once_with("list_objects_v2") + paginator.paginate.assert_called_once_with( + Bucket=s3_bucket, + Prefix=f"{s3_prefix}/", + ) + s3_client.head_object.assert_has_calls( + list(map( + lambda obj: call( + Bucket=s3_bucket, + Key=obj.get("key"), + ), + example_s3_objects.values(), + )), + ) + + +def test_get_s3_objects_non_recursive_success(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_object_key = f"{S3_PREFIX}/first-file.yml" + example_s3_objects = {} + example_s3_objects[NON_RECURSIVE_KEY] = ( + deepcopy(EXAMPLE_S3_OBJECTS["first-file.yml"]) + ) + file_extensions = [] + + s3_client.head_object.return_value = { + "Key": "first-file.yml", + "Metadata": { + **CURRENT_METADATA, + **UPLOAD_PREVIOUS_METADATA, + **IRRELEVANT_METADATA, + "sha256_hash": CURRENT_HASH, + }, + } + + assert get_s3_objects( + s3_client, + s3_bucket, + s3_object_key, + file_extensions, + recursive=False, + ) == example_s3_objects + + s3_client.head_object.assert_called_once_with( + Bucket=s3_bucket, + Key=s3_object_key, + ) + + +def test_upload_changed_files_simple(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + local_files = deepcopy(EXAMPLE_LOCAL_FILES) + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + metadata_to_check = { + "always_apply": deepcopy(CURRENT_METADATA), + "upon_upload_apply": { + "execution_id": "example-id", + "another-key": "another-value", + } + } + + with tempfile.NamedTemporaryFile(mode="wb", buffering=0) as file_pointer: + file_pointer.write(CURRENT_VERSION) + for key in local_files.keys(): + local_files[key]["file_path"] = file_pointer.name + + upload_changed_files( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + + local_updated = local_files["updated-file.yml"] + local_missing = local_files["missing-file.yml"] + object_outdated_metadata = local_files["needs-new-metadata-file.yaml"] + s3_client.put_object.assert_has_calls([ + call( + Body=ANY, + Bucket=s3_bucket, + Key=f"{s3_prefix}/{object_outdated_metadata['key']}", + Metadata={ + **metadata_to_check["always_apply"], + **metadata_to_check["upon_upload_apply"], + "sha256_hash": object_outdated_metadata["sha256_hash"], + } + ), + call( + Body=ANY, + Bucket=s3_bucket, + Key=f"{s3_prefix}/{local_updated['key']}", + Metadata={ + **metadata_to_check["always_apply"], + **metadata_to_check["upon_upload_apply"], + "sha256_hash": local_updated["sha256_hash"], + } + ), + call( + Body=ANY, + Bucket=s3_bucket, + Key=f"{s3_prefix}/{local_missing['key']}", + Metadata={ + **metadata_to_check["always_apply"], + **metadata_to_check["upon_upload_apply"], + "sha256_hash": local_missing["sha256_hash"], + } + ), + ]) + assert s3_client.put_object.call_count == 3 + + +def test_upload_changed_files_no_updates(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + local_files = deepcopy(EXAMPLE_LOCAL_FILES) + del local_files["updated-file.yml"] + del local_files["missing-file.yml"] + del local_files["needs-new-metadata-file.yaml"] + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + + for obj in s3_objects.values(): + for irrelevant_key in IRRELEVANT_METADATA.keys(): + obj["metadata"][irrelevant_key] = "some-different-value" + + with tempfile.NamedTemporaryFile(mode="wb", buffering=0) as file_pointer: + file_pointer.write(CURRENT_VERSION) + for key in local_files.keys(): + local_files[key]["file_path"] = file_pointer.name + + upload_changed_files( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check={ + "always_apply": {}, + "upon_upload_apply": {}, + }, + ) + + s3_client.put_object.assert_not_called() + + +def test_upload_changed_files_single_file(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = "missing-file.yml" + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + metadata_to_check = { + "always_apply": deepcopy(CURRENT_METADATA), + "upon_upload_apply": deepcopy(UPLOAD_NEW_METADATA), + } + + with tempfile.NamedTemporaryFile(mode="wb", buffering=0) as file_pointer: + file_pointer.write(CURRENT_VERSION) + local_files = { + "missing-file.yml": { + "key": s3_prefix, + "file_path": file_pointer.name, + "sha256_hash": CURRENT_HASH, + }, + } + + upload_changed_files( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + + local_missing = local_files["missing-file.yml"] + s3_client.put_object.assert_has_calls([ + call( + Body=ANY, + Bucket=s3_bucket, + Key=f"{local_missing['key']}", + Metadata={ + **metadata_to_check["always_apply"], + **metadata_to_check["upon_upload_apply"], + "sha256_hash": local_missing["sha256_hash"], + } + ), + ]) + assert s3_client.put_object.call_count == 1 + + +def test_upload_changed_files_single_file_no_update(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = "first-file.yml" + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + metadata_to_check = { + "always_apply": deepcopy(CURRENT_METADATA), + "upon_upload_apply": deepcopy(UPLOAD_NEW_METADATA), + } + + for obj in s3_objects.values(): + for irrelevant_key in IRRELEVANT_METADATA.keys(): + obj["metadata"][irrelevant_key] = "some-different-value" + + with tempfile.NamedTemporaryFile(mode="wb", buffering=0) as file_pointer: + file_pointer.write(CURRENT_VERSION) + local_files = { + "first-file.yml": { + "key": s3_prefix, + "file_path": file_pointer.name, + "sha256_hash": CURRENT_HASH, + }, + } + + upload_changed_files( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + + s3_client.put_object.assert_not_called() + + +def test_delete_stale_objects_simple(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + local_files = deepcopy(EXAMPLE_LOCAL_FILES) + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + + delete_stale_objects( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + ) + + s3_client.delete_objects.assert_called_once_with( + Bucket=s3_bucket, + Delete={ + "Objects": [{ + "Key": s3_objects.get("stale-file.yml").get("key"), + }], + }, + ) + + +def test_delete_stale_single_object(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = "stale-file.yml" + local_files = deepcopy(EXAMPLE_LOCAL_FILES) + s3_objects = { + "stale-file.yml": { + "key": s3_prefix, + "sha256_hash": CURRENT_HASH, + }, + } + + delete_stale_objects( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + ) + + s3_client.delete_objects.assert_called_once_with( + Bucket=s3_bucket, + Delete={ + "Objects": [{ + "Key": s3_prefix, + }], + }, + ) + + +def test_delete_stale_objects_no_stale_objects(): + s3_client = Mock() + s3_bucket = "your-bucket" + s3_prefix = S3_PREFIX + local_files = deepcopy(EXAMPLE_LOCAL_FILES) + s3_objects = deepcopy(EXAMPLE_S3_OBJECTS) + del s3_objects["stale-file.yml"] + + delete_stale_objects( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + ) + + s3_client.delete_objects.assert_not_called() + + +def test_clean_s3_prefix(): + assert clean_s3_prefix("some-path") == "some-path" + assert clean_s3_prefix("/some-path") == "some-path" + assert clean_s3_prefix("some-path/") == "some-path" + assert clean_s3_prefix("/some-path") == "some-path" + assert clean_s3_prefix("") == "" + + +def test_full_local_path_relative_to_cwd(): + local_path = "local/path" + here = Path(os.getcwd()) + assert (here / local_path) == get_full_local_path(local_path) + + +def test_full_local_path_absolute_path(): + absolute_path = "/absolute/path" + assert Path(absolute_path) == get_full_local_path(absolute_path) + + +def test_convert_to_s3_key(): + # Local key == s3_prefix + assert convert_to_s3_key("a.yml", "a.yml") == "a.yml" + + # S3 prefix is set + assert convert_to_s3_key("some-path", "prefix") == "prefix/some-path" + + # S3 prefix is set and local key matches NON_RECURSIVE_KEY + assert convert_to_s3_key(NON_RECURSIVE_KEY, "full-s3-obj") == "full-s3-obj" + + # S3 prefix is Non + assert convert_to_s3_key("some-path", "") == "some-path" + + +def test_convert_to_local_key(): + # Local key == s3_prefix + assert convert_to_local_key("a.yml", "a.yml") == "a.yml" + + # S3 prefix is set local + assert convert_to_local_key("prefix/some-path", "prefix") == "some-path" + + # S3 prefix is Nonlocal + assert convert_to_local_key("some-path", "") == "some-path" + + +@patch("sys.exit") +def test_ensure_valid_input_no_local_path(sys_exit): + s3_bucket = "your-bucket" + s3_prefix = "" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + + test_exit_message = "Would have exited with exit code 1" + sys_exit.side_effect = Exception(test_exit_message) + + with pytest.raises(Exception) as exc_info: + ensure_valid_input( + local_path="", + file_extensions=[".yml"], + s3_url=s3_url, + s3_bucket=s3_bucket, + s3_prefix=s3_prefix, + recursive=False, + ) + error_message = str(exc_info.value) + assert error_message.find(test_exit_message) >= 0 + + sys_exit.assert_called_once_with(1) + + +@patch("sys.exit") +def test_ensure_valid_input_no_destination_s3_url(sys_exit): + test_exit_message = "Would have exited with exit code 2" + sys_exit.side_effect = Exception(test_exit_message) + + with pytest.raises(Exception) as exc_info: + ensure_valid_input( + local_path="/tmp/some-path", + file_extensions=[".yml"], + s3_url="", + s3_bucket="", + s3_prefix="", + recursive=False, + ) + error_message = str(exc_info.value) + assert error_message.find(test_exit_message) >= 0 + + sys_exit.assert_called_once_with(2) + + +@patch("sys.exit") +def test_ensure_valid_input_non_recursive_and_no_s3_prefix(sys_exit): + s3_bucket = "your-bucket" + s3_prefix = "" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + + test_exit_message = "Would have exited with exit code 3" + sys_exit.side_effect = Exception(test_exit_message) + + with pytest.raises(Exception) as exc_info: + ensure_valid_input( + local_path="/tmp/some-path", + file_extensions=[".yml"], + s3_url=s3_url, + s3_bucket=s3_bucket, + s3_prefix=s3_prefix, + recursive=False, + ) + error_message = str(exc_info.value) + assert error_message.find(test_exit_message) >= 0 + + sys_exit.assert_called_once_with(3) + + +@patch("sys.exit") +def test_ensure_valid_input_recursive_and_path_does_not_exist(sys_exit): + s3_bucket = "your-bucket" + s3_prefix = "" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + + test_exit_message = "Would have exited with exit code 4" + sys_exit.side_effect = Exception(test_exit_message) + + with pytest.raises(Exception) as exc_info: + ensure_valid_input( + local_path="/tmp/some-path", + file_extensions=[".yml"], + s3_url=s3_url, + s3_bucket=s3_bucket, + s3_prefix=s3_prefix, + recursive=True, + ) + error_message = str(exc_info.value) + assert error_message.find(test_exit_message) >= 0 + + sys_exit.assert_called_once_with(4) + + +@patch("sys.exit") +def test_ensure_valid_input_not_recursive_and_path_is_a_dir(sys_exit): + s3_bucket = "your-bucket" + s3_prefix = "a-prefix.yml" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + + test_exit_message = "Would have exited with exit code 5" + sys_exit.side_effect = Exception(test_exit_message) + + with tempfile.TemporaryDirectory() as directory_path: + with pytest.raises(Exception) as exc_info: + ensure_valid_input( + local_path=directory_path, + file_extensions=[".yml"], + s3_url=s3_url, + s3_bucket=s3_bucket, + s3_prefix=s3_prefix, + recursive=False, + ) + error_message = str(exc_info.value) + assert error_message.find(test_exit_message) >= 0 + + sys_exit.assert_called_once_with(5) + + +@patch("sync_to_s3.delete_stale_objects") +@patch("sync_to_s3.upload_changed_files") +@patch("sync_to_s3.get_s3_objects") +@patch("sync_to_s3.get_local_files") +@patch("sync_to_s3.ensure_valid_input") +def test_sync_files_recursive_delete( + ensure_valid_input, + get_local_files, + get_s3_objects, + upload_files, + delete_stale, +): + s3_client = Mock() + local_path = "/tmp/some-path" + file_extensions = [".yml"] + s3_bucket = "your-bucket" + s3_prefix = "your-prefix" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + recursive = True + delete = True + metadata_to_check = { + "always_apply": deepcopy(CURRENT_METADATA), + "upon_upload_apply": deepcopy(UPLOAD_PREVIOUS_METADATA), + } + + local_files = Mock() + s3_objects = Mock() + get_local_files.return_value = local_files + get_s3_objects.return_value = s3_objects + + sync_files( + s3_client, + local_path, + file_extensions, + s3_url, + recursive, + delete, + metadata_to_check, + ) + + get_local_files.assert_called_once_with( + local_path, + file_extensions, + recursive, + ) + get_s3_objects.assert_called_once_with( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive, + ) + upload_files.assert_called_once_with( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + delete_stale.assert_called_once_with( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + ) + + +@patch("sync_to_s3.delete_stale_objects") +@patch("sync_to_s3.upload_changed_files") +@patch("sync_to_s3.get_s3_objects") +@patch("sync_to_s3.get_local_files") +@patch("sync_to_s3.ensure_valid_input") +def test_sync_files_recursive_no_delete( + ensure_valid_input, + get_local_files, + get_s3_objects, + upload_files, + delete_stale, +): + s3_client = Mock() + local_path = "/tmp/some-path" + file_extensions = [".yml"] + s3_bucket = "your-bucket" + s3_prefix = "your-prefix" + s3_url = f"s3://{s3_bucket}/{s3_prefix}" + recursive = True + delete = False + metadata_to_check = { + "always_apply": deepcopy(CURRENT_METADATA), + "upon_upload_apply": deepcopy(UPLOAD_PREVIOUS_METADATA), + } + + local_files = Mock() + s3_objects = Mock() + get_local_files.return_value = local_files + get_s3_objects.return_value = s3_objects + + sync_files( + s3_client, + local_path, + file_extensions, + s3_url, + recursive, + delete, + metadata_to_check, + ) + + ensure_valid_input.assert_called_once_with( + local_path, + file_extensions, + s3_url, + s3_bucket, + s3_prefix, + recursive, + ) + get_local_files.assert_called_once_with( + local_path, + file_extensions, + recursive, + ) + get_s3_objects.assert_called_once_with( + s3_client, + s3_bucket, + s3_prefix, + file_extensions, + recursive, + ) + upload_files.assert_called_once_with( + s3_client, + s3_bucket, + s3_prefix, + local_files, + s3_objects, + metadata_to_check, + ) + delete_stale.assert_not_called() diff --git a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/pytest.ini b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/pytest.ini index 8947ae49b..68298b1c8 100644 --- a/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/pytest.ini +++ b/src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/pytest.ini @@ -1,3 +1,3 @@ [pytest] testpaths = tests -norecursedirs = python cdk +norecursedirs = python cdk helpers diff --git a/src/template.yml b/src/template.yml index 6763b9a23..bbfea92a7 100644 --- a/src/template.yml +++ b/src/template.yml @@ -1287,7 +1287,8 @@ Resources: - aws s3 sync ./adf-build/shared s3://$DEPLOYMENT_ACCOUNT_BUCKET/adf-build --quiet # Base templates: - aws s3 sync . s3://$S3_BUCKET --quiet --delete - - aws s3 sync ./adf-accounts s3://$ACCOUNT_BUCKET --quiet + # Upload account files to the ACCOUNT_BUCKET + - python adf-build/shared/helpers/sync_to_s3.py --extension .yml --extension .yaml --metadata adf_version=${ADF_VERSION} --upload-with-metadata execution_id=${CODEPIPELINE_EXECUTION_ID} --recursive adf-accounts s3://$ACCOUNT_BUCKET # Updates config, updates (or creates) base stacks: - python adf-build/main.py Type: CODEPIPELINE @@ -1333,6 +1334,14 @@ Resources: - Name: "TemplateSource" Configuration: ProjectName: !Ref CodeBuildProject + EnvironmentVariables: >- + [ + { + "name": "CODEPIPELINE_EXECUTION_ID", + "value": "#{codepipeline.PipelineExecutionId}", + "type": "PLAINTEXT" + } + ] RunOrder: 1 CodePipelineRole: diff --git a/tox.ini b/tox.ini index babe1cda8..bd05d0910 100644 --- a/tox.ini +++ b/tox.ini @@ -15,6 +15,7 @@ setenv= AWS_REGION=eu-central-1 AWS_DEFAULT_REGION=eu-central-1 ADF_PIPELINE_PREFIX=adf-pipeline- + CODEBUILD_BUILD_ID=abcdef S3_BUCKET=some_bucket S3_BUCKET_NAME=some_bucket DEPLOYMENT_ACCOUNT_BUCKET=some_deployment_account_bucket