diff --git a/terraform/environments/electronic-monitoring-data/data_store.tf b/terraform/environments/electronic-monitoring-data/data_store.tf deleted file mode 100644 index db470e0e863..00000000000 --- a/terraform/environments/electronic-monitoring-data/data_store.tf +++ /dev/null @@ -1,210 +0,0 @@ - - -resource "aws_s3_bucket_notification" "data_store" { - depends_on = [aws_sns_topic_policy.s3_events_policy] - bucket = module.s3-data-bucket.bucket.id - - # Only for copy events as those are events triggered by data being copied - #  from landing bucket. - topic { - topic_arn = aws_sns_topic.s3_events.arn - events = [ - "s3:ObjectCreated:*" - ] - } -} - -resource "aws_sns_topic" "s3_events" { - name = "${module.s3-data-bucket.bucket.id}-object-created-topic" -} - -# Define the IAM policy document for the SNS topic policy -data "aws_iam_policy_document" "sns_policy" { - statement { - effect = "Allow" - - principals { - type = "Service" - identifiers = ["s3.amazonaws.com"] - } - - actions = ["SNS:Publish"] - resources = [aws_sns_topic.s3_events.arn] - - condition { - test = "ArnLike" - variable = "aws:SourceArn" - values = [module.s3-data-bucket.bucket.arn] - } - } -} - -# Apply the policy to the SNS topic -resource "aws_sns_topic_policy" "s3_events_policy" { - arn = aws_sns_topic.s3_events.arn - policy = data.aws_iam_policy_document.sns_policy.json -} - -#------------------------------------------------------------------------------ -# temporary replication config -#------------------------------------------------------------------------------ - -data "aws_iam_policy_document" "assume_role" { - statement { - effect = "Allow" - - principals { - type = "Service" - identifiers = ["s3.amazonaws.com"] - } - - actions = ["sts:AssumeRole"] - } -} - - -#------------------------------------------------------------------------------ -# S3 lambda function to calculate data store file checksums -#------------------------------------------------------------------------------ - -variable "checksum_algorithm" { - type = string - description = "Select Checksum Algorithm. Default and recommended choice is SHA256, however CRC32, CRC32C, SHA1 are also available." - default = "SHA256" -} - -data "archive_file" "calculate_checksum_lambda" { - type = "zip" - source_file = "lambdas/calculate_checksum_lambda.py" - output_path = "lambdas/calculate_checksum_lambda.zip" -} - -resource "aws_lambda_function" "calculate_checksum_lambda" { - filename = "lambdas/calculate_checksum_lambda.zip" - function_name = "calculate-checksum-lambda" - role = aws_iam_role.calculate_checksum_lambda.arn - handler = "calculate_checksum_lambda.handler" - runtime = "python3.12" - memory_size = 4096 - timeout = 900 - - environment { - variables = { - Checksum = var.checksum_algorithm - } - } - - tags = local.tags -} - -resource "aws_iam_role" "calculate_checksum_lambda" { - name = "calculate-checksum-lambda-iam-role" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json - managed_policy_arns = ["arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"] -} - -data "aws_iam_policy_document" "calculate_checksum_lambda" { - statement { - sid = "S3Permissions" - effect = "Allow" - actions = [ - "s3:PutObject", - "s3:PutObjectTagging", - "s3:PutObjectAcl", - "s3:GetObject", - "s3:GetObjectVersion", - "s3:GetObjectTagging", - "s3:GetObjectAttributes", - "s3:GetObjectVersionAttributes", - "s3:ListBucket" - ] - resources = ["${module.s3-data-bucket.bucket.arn}/*"] - } -} - -resource "aws_iam_role_policy" "calculate_checksum_lambda" { - name = "calculate_checksum-lambda-iam-policy" - role = aws_iam_role.calculate_checksum_lambda.id - policy = data.aws_iam_policy_document.calculate_checksum_lambda.json -} - -resource "aws_lambda_permission" "allow_sns_invoke_checksum_lambda" { - statement_id = "AllowSNSInvokeChecksum" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.calculate_checksum_lambda.function_name - principal = "sns.amazonaws.com" - source_arn = aws_sns_topic.s3_events.arn -} - -resource "aws_sns_topic_subscription" "checksum_lambda_subscription" { - topic_arn = aws_sns_topic.s3_events.arn - protocol = "lambda" - endpoint = aws_lambda_function.calculate_checksum_lambda.arn - - depends_on = [aws_lambda_permission.allow_sns_invoke_checksum_lambda] -} - -#------------------------------------------------------------------------------ -# S3 lambda function to perform zip file summary -#------------------------------------------------------------------------------ - -data "archive_file" "summarise_zip_lambda" { - type = "zip" - source_file = "lambdas/summarise_zip_lambda.py" - output_path = "lambdas/summarise_zip_lambda.zip" -} - -resource "aws_lambda_function" "summarise_zip_lambda" { - filename = "lambdas/summarise_zip_lambda.zip" - function_name = "summarise-zip-lambda" - role = aws_iam_role.summarise_zip_lambda.arn - handler = "summarise_zip_lambda.handler" - runtime = "python3.12" - timeout = 900 - memory_size = 1024 - layers = ["arn:aws:lambda:eu-west-2:017000801446:layer:AWSLambdaPowertoolsPythonV2:67"] - source_code_hash = data.archive_file.summarise_zip_lambda.output_base64sha256 - tags = local.tags -} - -resource "aws_iam_role" "summarise_zip_lambda" { - name = "summarise-zip-iam-role" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json - managed_policy_arns = ["arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"] -} - -data "aws_iam_policy_document" "summarise_zip_lambda" { - statement { - sid = "S3Permissions" - effect = "Allow" - actions = [ - "s3:GetObject", - "s3:PutObject", - "s3:ListBucket" - ] - resources = ["${module.s3-data-bucket.bucket.arn}/*"] - } -} - -resource "aws_iam_role_policy" "summarise_zip_lambda" { - name = "summarise-zip-iam-policy" - role = aws_iam_role.summarise_zip_lambda.id - policy = data.aws_iam_policy_document.summarise_zip_lambda.json -} - -resource "aws_lambda_permission" "allow_sns_invoke_zip_lambda" { - statement_id = "AllowSNSInvokeZip" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.summarise_zip_lambda.function_name - principal = "sns.amazonaws.com" - source_arn = aws_sns_topic.s3_events.arn -} - - -resource "aws_sns_topic_subscription" "zip_lambda_subscription" { - topic_arn = aws_sns_topic.s3_events.arn - protocol = "lambda" - endpoint = aws_lambda_function.summarise_zip_lambda.arn - - depends_on = [aws_lambda_permission.allow_sns_invoke_zip_lambda] -} diff --git a/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.py b/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.py deleted file mode 100644 index d804f4da6ec..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Lambda function to add Checksum metadata to files. - -This function adds two tags to files that contain the SHA256 checksum as well -as the Base64 encoded SHA256 checksum (this is what AWS displays by default). - -To calculate a files SHA256 checksum locally in the command line (i.e. to -compare) use: -``` -shasum -a 256 path/to/file -``` - -and: - -``` -shasum -a 256 path/to/file | cut -f1 -d\ | xxd -r -p | base64 -``` - -to encode it to Base64. -""" -import base64 -import boto3 -import hashlib - -import io -import json -import zipfile - - -s3_client = boto3.client('s3') - - -def handler(event, context): - print(event) - - event_type = event['Records'][0]['eventName'] - bucket_name = event['Records'][0]['s3']['bucket']['name'] - object_key = event['Records'][0]['s3']['object']['key'] - size = event['Records'][0]['s3']['object']['size'] - - print(f'{object_key = } of {size = }B added to {bucket_name = } via {event_type = }') - - chunk_size = 131072 # 128KB - - # TEMPORARILY ALSO ADDING THE ZIP SUMMARY CODE HERE. - # Check if the object key ends with '.zip' - if object_key.endswith('.zip'): - print(f"Summarising '{object_key = }' as has '.zip' extension") - zip_data = summarise_zip_file( - bucket_name=bucket_name, - object_key=object_key, - chunk_size=chunk_size, - ) - - save_info_json( - bucket_name=bucket_name, - object_key=object_key, - json_data=zip_data, - ) - - - # Generate the SHA256 checksum of the object. - hash_value = generate_sha256_checksum( - bucket_name=bucket_name, - object_key=object_key, - chunk_size=chunk_size, - ) - - hash_64 = convert_hash_to_base64(hash_value=hash_value) - - additional_tags = { - 'SHA-256 checksum': hash_value, - 'Base64 SHA-256 checksum': hash_64, - } - - add_sha256_tags( - bucket_name=bucket_name, - object_key=object_key, - additional_tags=additional_tags, - ) - - return None - - -def generate_sha256_checksum( - bucket_name, - object_key, - chunk_size = 65536, # 64 KB chunk. -): - print(f'Using {chunk_size=}B to read object') - - # Initialize the SHA256 hash object - sha256_hash = hashlib.sha256() - - # Retrieve the object data from S3 in chunks - try: - processed_chunks = 0 - - response = s3_client.get_object(Bucket=bucket_name, Key=object_key) - object_stream = response['Body'] - - while True: - chunk = object_stream.read(chunk_size) - if not chunk: - break - sha256_hash.update(chunk) - - processed_chunks += 1 - - if processed_chunks % 25000 == 0: - print(f'Processed {processed_chunks} chunks') - - except Exception as e: - print("Error:", e) - return None - - # Calculate the SHA256 checksum - hash_value = sha256_hash.hexdigest() - - print(f'SHA256 checksum: {hash_value}') - - return hash_value - - -def convert_hash_to_base64(hash_value): - # Convert hexdigest to bytes - binary_data = bytes.fromhex(hash_value) - - # Encode bytes to base64 - hash_64 = base64.b64encode(binary_data).decode() - - # Print the Base 64 encoded SHA256 checksum to CloudWatch logs. - print(f'Base 64 SHA256 checksum: {hash_64}') - - return hash_64 - - -def add_sha256_tags( - bucket_name, - object_key, - additional_tags, -): - # Retrieve existing tags for the object - response = s3_client.get_object_tagging( - Bucket=bucket_name, - Key=object_key - ) - - # Merge existing tags with additional tags - existing_tags = response.get('TagSet', []) - existing_tags.extend([ - {'Key': key, 'Value': value} - for key, value in additional_tags.items() - ]) - - print(existing_tags) - - # Update tags for the object - s3_client.put_object_tagging( - Bucket=bucket_name, - Key=object_key, - Tagging={ - 'TagSet': existing_tags - } - ) - - print(f'Added tags = {list(additional_tags.keys())} to {object_key = }') - - return None - - -def summarise_zip_file( - bucket_name, - object_key, - chunk_size=4096, -): - try: - # Initialize counters - total_folders = 0 - total_files = 0 - total_size = 0 - total_packed_size = 0 - - # Initialize physical size of the zip file - physical_size = 0 - - # Create an in-memory buffer to read chunks of data - buffer = io.BytesIO() - - # Read the contents of the zip file from S3 in chunks - response = s3_client.get_object( - Bucket=bucket_name, - Key=object_key, - ) - zip_stream = response['Body'] - - # Read and process the zip file in chunks - while True: - chunk = zip_stream.read(chunk_size) - if not chunk: - break - - # Write the chunk to the buffer - buffer.write(chunk) - - # Check if a complete file exists in the buffer - while True: - # Seek to the beginning of the buffer - buffer.seek(0) - - # Attempt to open the zip file with the current buffer contents - try: - with zipfile.ZipFile(buffer, 'r') as zip_ref: - # Get file list - file_list = zip_ref.namelist() - - # Iterate through files in the zip - for file_name in file_list: - # Extract file information - file_info = zip_ref.getinfo(file_name) - - # Update counters - if file_info.is_dir(): - total_folders += 1 - else: - total_files += 1 - total_size += file_info.file_size - total_packed_size += file_info.compress_size - - # Get the physical size of the zip file - physical_size = buffer.tell() - - # Reset the buffer - buffer.seek(0) - buffer.truncate(0) - - # Exit the inner loop - break - - # If the buffer does not contain a complete zip file, continue reading chunks - except zipfile.BadZipFile: - break - - except Exception as e: - print("Error:", e) - return None - - # Return analysis results - return { - 'size': total_size, - 'packed_size': total_packed_size, - 'folders': total_folders, - 'files': total_files, - 'physical_size': physical_size, - } - - -def save_info_json( - bucket_name, - object_key, - json_data, -): - json_content = json.dumps(json_data) - - # Saving JSON content to a new file with .json extension - new_object_key = object_key + '.info.json' - - s3_client.put_object( - Bucket=bucket_name, - Key=new_object_key, - Body=json_content.encode('utf-8') - ) - - print(f'Json information saved to {new_object_key}') \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.zip b/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.zip deleted file mode 100644 index 38d2dd837bc..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/calculate_checksum_lambda.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_table.zip b/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_table.zip deleted file mode 100644 index 3b9b39e4904..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_table.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_tables.zip b/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_tables.zip deleted file mode 100644 index f29d3f785a5..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_external_tables.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.py b/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.py deleted file mode 100644 index b788742c5e6..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -For a given table in a database, create a table in the glue catalog given metadata -""" - -import awswrangler as wr -from mojap_metadata.converters.glue_converter import GlueConverter, GlueConverterOptions -from mojap_metadata import Metadata -from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities.typing import LambdaContext - -import os -import boto3 - -s3 = boto3.client("s3") -glue_client = boto3.client("glue") -lambda_client = boto3.client("lambda") - -logger = Logger() - -S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME") - -def create_glue_table(metadata, schema_name): - db_name = metadata.database_name - db_path = f"{S3_BUCKET_NAME}/{db_name}/{schema_name}" - table_name = metadata.name - metadata.file_format = "parquet" - logger.info(f"Table Name: {table_name}") - try: - # Delete table - wr.catalog.delete_table_if_exists(database=db_name, table=table_name) - logger.info(f"Delete table {table_name} in database {db_name}") - except s3.exceptions.from_code("EntityNotFoundException"): - logger.info(f"Database '{db_name}' table '{table_name}' does not exist") - options = GlueConverterOptions() - options.csv.skip_header = True - gc = GlueConverter(options) - boto_dict = gc.generate_from_meta( - metadata, - database_name=db_name, - table_location=f"s3://{db_path}/{table_name}", - ) - glue_client.create_table(**boto_dict) - return boto_dict - - -@logger.inject_lambda_context -def handler(event: dict, context: LambdaContext) -> str: - schema_name = event["database"] - meta = Metadata.from_dict(event) - boto_dict = create_glue_table(meta, schema_name) - table_name = boto_dict["TableInput"]["Name"] - result = { - "status": "success", - "message": f"Created table {table_name} for in glue", - "created_tables": boto_dict["TableInput"], - } - - logger.info(result) - return result diff --git a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.zip b/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.zip deleted file mode 100644 index 5c34af4be11..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/create_athena_table.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/em_ap_transfer_lambda.zip b/terraform/environments/electronic-monitoring-data/lambdas/em_ap_transfer_lambda.zip deleted file mode 100644 index d5f646d0fe1..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/em_ap_transfer_lambda.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.py b/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.py deleted file mode 100644 index e402d94a93d..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.py +++ /dev/null @@ -1,91 +0,0 @@ -import json -import boto3 -import re -import logging -import os - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -s3_client = boto3.client("s3") - -PARQUET_BUCKET_NAME = os.environ.get("PARQUET_BUCKET_NAME") - -def bucket_key_to_s3_path(bucket, key): - """ - Takes an S3 bucket and key combination and returns the - full S3 path to that location. - """ - return f"s3://{bucket}/{key}" - -def _add_slash(s): - """ - Adds slash to end of string - """ - return s if s[-1] == "/" else s + "/" - -def s3_path_to_bucket_key(s3_path): - """ - Splits out s3 file path to bucket key combination - """ - return s3_path.replace("s3://", "").split("/", 1) - -def get_filepaths_from_s3_folder( - s3_folder_path, file_extension=None, exclude_zero_byte_files=False -): - """ - Get a list of filepaths from a bucket. If extension is set to a string - then only return files with that extension otherwise if set to None (default) - all filepaths are returned. - :param s3_folder_path: "s3://...." - :param extension: file extension, e.g. .json - :param exclude_zero_byte_files: Whether to filter out results of zero size: True - :return: A list of full s3 paths that were in the given s3 folder path - """ - - s3_resource = boto3.resource("s3") - - if file_extension is not None: - if file_extension[0] != ".": - file_extension = "." + file_extension - - # This guarantees that the path the user has given is really a 'folder'. - s3_folder_path = _add_slash(s3_folder_path) - - bucket, key = s3_path_to_bucket_key(s3_folder_path) - - s3b = s3_resource.Bucket(bucket) - obs = s3b.objects.filter(Prefix=key) - - if file_extension is not None: - obs = [o for o in obs if o.key.endswith(file_extension)] - - if exclude_zero_byte_files: - obs = [o for o in obs if o.size != 0] - - ob_keys = [o.key for o in obs] - - paths = sorted([bucket_key_to_s3_path(bucket, o) for o in ob_keys]) - - return paths - -# Lambda function to copy file from one S3 bucket to another -def handler(event, context): - # Specify source bucket - for key, value in event.items(): - database_name, table_name = key, value - logger.info(f"Copying table {table_name} from database {database_name}") - - source_key = f"{database_name}/*/{table_name}" - destination_key = f"electronic_monitoring/load/{database_name}/{table_name}/" - logger.info(f"Getting file keys: {source_key} from bucket: {PARQUET_BUCKET_NAME}") - - # List objects in the source folder and filter using regex pattern - file_paths = get_filepaths_from_s3_folder(f"s3://{PARQUET_BUCKET_NAME}/{database_name}/") - pattern = re.compile(rf'{database_name}/.*?/{table_name}', re.IGNORECASE) - filtered_paths = [path for path in file_paths if pattern.search(path)] - - logger.info(f"Number of files: {len(filtered_paths)}") - - # Return list of file paths that match the pattern - return [{source_key: file_path} for file_path in filtered_paths] diff --git a/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.zip b/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.zip deleted file mode 100644 index 76e274356ac..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/get_file_keys_for_table.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.py b/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.py deleted file mode 100644 index 8048a9dd3c2..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -connects to rds for a given database and uses mojap metadata to convert -mojap metadata type and writes out the list of metadata for all tables in the database -""" - -import awswrangler as wr -from mojap_metadata.converters.sqlalchemy_converter import ( - SQLAlchemyConverter, - SQLAlchemyConverterOptions, -) -from botocore.exceptions import NoCredentialsError, PartialCredentialsError -from sqlalchemy import create_engine -import os -import logging -import boto3 - -s3 = boto3.client("s3") -glue_client = boto3.client("glue") -lambda_client = boto3.client("lambda") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -SECRET_NAME = os.environ.get("SECRET_NAME") -METADATA_STORE_BUCKET = os.environ.get("METADATA_STORE_BUCKET") - - -def get_rds_connection(db_name): - con_sqlserver = wr.sqlserver.connect( - secret_id=SECRET_NAME, odbc_driver_version=17, dbname=db_name - ) - logger.info("Successfully connected to RDS database") - return con_sqlserver - - -def create_glue_database(db_name): - # Try to delete the database - try: - # Delete database - wr.catalog.delete_database(name=db_name) - logger.info(f"Delete database {db_name}") - # Handle case where database doesn't exist - except s3.exceptions.from_code("EntityNotFoundException"): - logger.info(f"Database '{db_name}' does not exist") - wr.catalog.create_database(name=db_name, exist_ok=True) - - -def upload_to_s3(local_filepath: str, s3_filepath: str) -> None: - bucket_name, key = s3_filepath[5:].split("/", 1) - - try: - s3.upload_file(local_filepath, bucket_name, key) - logger.info(f"Successfully uploaded {local_filepath} to {s3_filepath}") - except (NoCredentialsError, PartialCredentialsError) as e: - logger.info(f"Error uploading to S3: {e}") - - -def write_meta_to_s3(meta): - db_name = meta.database_name - table_name = meta.name - temp_path = "/tmp/temp.json" - s3_path = f"s3://{METADATA_STORE_BUCKET}/database={db_name}/table_name={table_name}/metadata.json" - meta.to_json(temp_path) - upload_to_s3(temp_path, s3_path) - - -def add_db_to_meta(meta, db_name): - """ - Database is currently down as dbo - - reassign to actual DB Name - """ - meta.file_format = "parquet" - meta.database_name = db_name - return meta - - -def remove_comments_from_meta(meta): - for col in meta["columns"]: - col["description"] = "" - return meta - - -def reassign_binary_cols(meta): - for col in meta["columns"]: - if col["type"] == "binary": - if col["name"] == "row_v": - col["type"] == "string" - return meta - - -def handler(event, context): - db_name = event.get("db_name") - conn = get_rds_connection(db_name) - engine = create_engine("mssql+pyodbc://", creator=lambda: conn) - opt = SQLAlchemyConverterOptions(convert_to_snake=True) - sqlc = SQLAlchemyConverter(engine, opt) - metadata_list = sqlc.generate_to_meta_list(schema="dbo") - metadata_list = [add_db_to_meta(meta, db_name) for meta in metadata_list] - for meta in metadata_list: - write_meta_to_s3(meta) - dict_metadata_list = [meta.to_dict() for meta in metadata_list] - dict_metadata_list = [ - remove_comments_from_meta(meta) for meta in dict_metadata_list - ] - dict_metadata_list = [reassign_binary_cols(meta) for meta in dict_metadata_list] - - create_glue_database(db_name) - result = { - "status": "success", - "message": f"Found {len(metadata_list)} tables", - "metadata_list": dict_metadata_list, - } - logger.info(result) - return result diff --git a/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.zip b/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.zip deleted file mode 100644 index 3db3609815a..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/get_metadata_from_rds.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/get_tables_from_db.py b/terraform/environments/electronic-monitoring-data/lambdas/get_tables_from_db.py deleted file mode 100644 index 186755e9de2..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/get_tables_from_db.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import logging -import boto3 -import json - -glue = boto3.client("glue") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -def handler(event, context): - db_name = event.get("db_name") - response = glue.get_tables(DatabaseName=db_name) - tables = response['TableList'] - table_names = [{db_name: table['Name']} for table in tables] - return table_names \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas/layers/create_athena_table_layer.zip b/terraform/environments/electronic-monitoring-data/lambdas/layers/create_athena_table_layer.zip deleted file mode 100644 index 889c66aca6c..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/layers/create_athena_table_layer.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/layers/mojap_metadata.zip b/terraform/environments/electronic-monitoring-data/lambdas/layers/mojap_metadata.zip deleted file mode 100644 index dadbc4d7311..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/layers/mojap_metadata.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/layers/pandas-layer.zip b/terraform/environments/electronic-monitoring-data/lambdas/layers/pandas-layer.zip deleted file mode 100644 index fefae14bb8d..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/layers/pandas-layer.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/manual_trigger_lambda.py b/terraform/environments/electronic-monitoring-data/lambdas/manual_trigger_lambda.py deleted file mode 100644 index c1516884616..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/manual_trigger_lambda.py +++ /dev/null @@ -1,63 +0,0 @@ -import boto3 -from logging import getLogger -import sys -import json - -logger = getLogger(__name__) -AWS_REGION = "eu-west-2" - -# Initialize Boto3 client for S3 -s3 = boto3.client("s3") - - -def invoke_copy_lambda(source_bucket_name, file_key, lambda_function_name): - """ - Invoke the Lambda function to copy file from source to destination bucket. - """ - lambda_client = boto3.client("lambda", region_name=AWS_REGION) - payload = { - "Records": [ - { - "s3": { - "bucket": {"name": source_bucket_name}, - "object": {"key": file_key}, - } - } - ] - } - response = lambda_client.invoke( - FunctionName=lambda_function_name, - InvocationType="Event", # Asynchronous invocation - Payload=json.dumps(payload), - ) - logger.info(f"Invoked Lambda '{lambda_function_name}' to copy file '{file_key}'") - - -def process_s3_bucket(bucket_name, lambda_function_name): - """ - List objects in the S3 bucket and invoke Lambda for each object. - """ - try: - # List objects in the bucket - response = s3.list_objects_v2(Bucket=bucket_name) - - # If there are objects, invoke Lambda for each object - if "Contents" in response: - for obj in response["Contents"]: - file_key = obj["Key"] - invoke_copy_lambda(file_key, lambda_function_name) - else: - print(f"No objects found in the bucket '{bucket_name}'.") - except Exception as e: - print(f"Error: {str(e)}") - - -if __name__ == "__main__": - if len(sys.argv) != 3: - logger.info("Usage: python script.py ") - sys.exit(1) - - bucket_name = sys.argv[1] - lambda_function_name = sys.argv[2] - - process_s3_bucket(bucket_name, lambda_function_name) diff --git a/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.py b/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.py deleted file mode 100644 index dacfe5c58ea..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.py +++ /dev/null @@ -1,4 +0,0 @@ -def handler(event, context): - data = event["queryOutput"]["ResultSet"]["Rows"][1:] - output_list = [{row["Data"][0]["VarCharValue"]: row["Data"][1]["VarCharValue"]} for row in data] - return output_list diff --git a/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.zip b/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.zip deleted file mode 100644 index 49be0b66e2b..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/query_output_to_list.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/send_table_to_ap.zip b/terraform/environments/electronic-monitoring-data/lambdas/send_table_to_ap.zip deleted file mode 100644 index b507fcf61e4..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/send_table_to_ap.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.py b/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.py deleted file mode 100644 index 25561a5accf..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.py +++ /dev/null @@ -1,77 +0,0 @@ -import boto3 -import json -from logging import getLogger -from aws_lambda_powertools.utilities.streaming.transformations import ZipTransform -from aws_lambda_powertools.utilities.streaming.s3_object import S3Object - -logger = getLogger(__name__) - - -def handler(event, context): - """ - Read contents of a zip file and log directory structure and item count. - """ - logger.info(event) - - event_type = event["Records"][0]["eventName"] - bucket = event["Records"][0]["s3"]["bucket"]["name"] - object_key = event["Records"][0]["s3"]["object"]["key"] - - # Check if the object key ends with '.zip' - if not object_key.endswith(".zip"): - logger.info(f"Stopping for'{object_key = }' as suffix other than '.zip'") - return None - - logger.info(f"{object_key = } added to {bucket = } via {event_type = }") - - # Create S3 client - s3_client = boto3.client("s3") - - s3_object = S3Object(bucket=bucket, key=object_key) - - logger.info(f"Read in {object_key} from S3.") - - # Extract files from the zip - zip_ref = s3_object.transform(ZipTransform()) - # List all files in the zip - file_list = zip_ref.namelist() - - # Total number of files - total_files = len(file_list) - logger.info(f"Looping through {total_files} files.") - # Directory structure dictionary - directory_structure = {} - - # Read each file's content and build directory structure - for file_name in file_list: - if not file_name.endswith("/"): - parts = file_name.split("/") - current_dict = directory_structure - - # Traverse the directory structure and create dictionary entries - for part in parts[:-1]: - if part not in current_dict: - current_dict[part] = {} - current_dict = current_dict[part] - - logger.info(f"\n\nJSON directory structure:\n{directory_structure}") - - logger.info(f"\n\n Total files in {object_key}: {total_files}") - - # Writing the JSON file with the information - json_data = { - "total_objects": total_files, - "directory_structure": directory_structure, - } - json_content = json.dumps(json_data) - - # Saving JSON content to a new file with .json extension - new_object_key = object_key + ".info.json" - - s3_client.put_object( - Bucket=bucket, Key=new_object_key, Body=json_content.encode("utf-8") - ) - - logger.info(f"Zip info saved to {new_object_key}") - - return None diff --git a/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.zip b/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.zip deleted file mode 100644 index bad620e60d8..00000000000 Binary files a/terraform/environments/electronic-monitoring-data/lambdas/summarise_zip_lambda.zip and /dev/null differ diff --git a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Dockerfile b/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Dockerfile deleted file mode 100644 index a4a3394f459..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.11 - -COPY requirements.txt . - -RUN pip install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" - -COPY update_log_table.py ${LAMBDA_TASK_ROOT} - -CMD ["update_log_table.handler"] diff --git a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Makefile b/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Makefile deleted file mode 100644 index 12a3f809c9c..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -APP_NAME = update_log_table -APP_VERSION := $(shell terraform -chdir=../../ output -raw account_suffix) - -.PHONY: print-account-suffix -print-account-suffix: - @echo APP_VERSION=$(APP_VERSION) - -AWS_ECR_ACCOUNT_ID := $(shell terraform -chdir=../../ output -raw account_id) - -.PHONY: print-account-id -print-account-id: - @echo AWS_ECR_ACCOUNT_ID=$(AWS_ECR_ACCOUNT_ID) - - -AWS_ECR_REGION = eu-west-2 -AWS_ECR_REPO = lambdas/$(APP_NAME) - -TAG = $(APP_VERSION) - -.PHONY: docker/build docker/push docker/run docker/test - -docker/build : - docker build -t $(APP_NAME):$(APP_VERSION) . - -docker/push: docker/build - aws ecr get-login-password --region $(AWS_ECR_REGION) | docker login --username AWS --password-stdin $(AWS_ECR_ACCOUNT_ID).dkr.ecr.$(AWS_ECR_REGION).amazonaws.com - docker tag $(APP_NAME):$(APP_VERSION) $(AWS_ECR_ACCOUNT_ID).dkr.ecr.$(AWS_ECR_REGION).amazonaws.com/$(AWS_ECR_REPO):$(TAG) - docker push $(AWS_ECR_ACCOUNT_ID).dkr.ecr.$(AWS_ECR_REGION).amazonaws.com/$(AWS_ECR_REPO):$(TAG) - -docker/run: - docker run -p 9000:8080 $(AWS_ECR_ACCOUNT_ID).dkr.ecr.$(AWS_ECR_REGION).amazonaws.com/$(AWS_ECR_REPO):$(TAG) - -docker/test: - curl -XPOST 'http://localhost:9000/2015-03-31/functions/function/invocations' -d '{"input": {"test/*/Financials": "s3://dms-rds-to-parquet-20240606142913727200000001/test/dbo/Financials/LOAD00000001.parquet","db_info": ["test","dbo","Financials"]},"inputDetails": {"truncated": false},"resource": "arn:aws:lambda:eu-west-2:800964199911:function:update_log_table"}' diff --git a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/requirements.txt b/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/requirements.txt deleted file mode 100644 index 64a842f4b2a..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pandas[pyarrow]==2.2.1 \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/update_log_table.py b/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/update_log_table.py deleted file mode 100644 index 4fe63ba1ef6..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas/update_log_table/update_log_table.py +++ /dev/null @@ -1,87 +0,0 @@ -import pandas as pd -import boto3 -import logging -import os - -logger = logging.getLogger(__name__) - -logger.setLevel(logging.INFO) - -S3_LOG_BUCKET = os.environ.get("S3_LOG_BUCKET") -DATABASE_NAME = os.envrion.get("DATABASE_NAME") -TABLE_NAME = os.environ.get("TABLE_NAME") - -def s3_path_to_bucket_key(s3_path): - """ - Splits out s3 file path to bucket key combination - """ - return s3_path.replace("s3://", "").split("/", 1) - - -def bucket_key_to_s3_path(bucket, key): - """ - Takes an S3 bucket and key combination and returns the - full S3 path to that location. - """ - return f"s3://{bucket}/{key}" - - -def _add_slash(s): - """ - Adds slash to end of string - """ - return s if s[-1] == "/" else s + "/" - - -def get_filepaths_from_s3_folder( - s3_folder_path, file_extension=None, exclude_zero_byte_files=True -): - """ - Get a list of filepaths from a bucket. If extension is set to a string - then only return files with that extension otherwise if set to None (default) - all filepaths are returned. - :param s3_folder_path: "s3://...." - :param extension: file extension, e.g. .json - :param exclude_zero_byte_files: Whether to filter out results of zero size: True - :return: A list of full s3 paths that were in the given s3 folder path - """ - - s3_resource = boto3.resource("s3") - - if file_extension is not None: - if file_extension[0] != ".": - file_extension = "." + file_extension - - # This guarantees that the path the user has given is really a 'folder'. - s3_folder_path = _add_slash(s3_folder_path) - - bucket, key = s3_path_to_bucket_key(s3_folder_path) - - s3b = s3_resource.Bucket(bucket) - obs = s3b.objects.filter(Prefix=key) - - if file_extension is not None: - obs = [o for o in obs if o.key.endswith(file_extension)] - - if exclude_zero_byte_files: - obs = [o for o in obs if o.size != 0] - - ob_keys = [o.key for o in obs] - - paths = sorted([bucket_key_to_s3_path(bucket, o) for o in ob_keys]) - - return paths - -def handler(event, context): - database_name, schema_name, table_name = event.get("db_info") - s3_path = f"s3://{S3_LOG_BUCKET}/{DATBASE_NAME}/{TABLE_NAME}/database_name={database_name}/full_table_name={database_name}_{schema_name}_{table_name}" - file_names = [file.split("/")[-1] for file in get_filepaths_from_s3_folder(s3_path)] - log_table = pd.read_parquet(s3_path) - log_table["table_to_ap"] = "True" - try: - log_table.to_parquet(f"{s3_path}/{file_names[0]}") - except Exception as e: - msg = f"An error has occured: {e}" - logger.error(msg) - raise msg - return {} \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas_iam.tf b/terraform/environments/electronic-monitoring-data/lambdas_iam.tf index b452ed71980..84d8ea792fc 100644 --- a/terraform/environments/electronic-monitoring-data/lambdas_iam.tf +++ b/terraform/environments/electronic-monitoring-data/lambdas_iam.tf @@ -1,227 +1,3 @@ -# -------------------------------------------------------------------------------- -# create_athena_external_tables IAM -# -------------------------------------------------------------------------------- - -resource "aws_iam_role" "create_athena_table_lambda" { - name = "create_athena_table_lambda" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - -resource "aws_iam_role_policy_attachment" "get_glue_connections_and_tables" { - role = aws_iam_role.create_athena_table_lambda.name - policy_arn = aws_iam_policy.get_glue_connections_and_tables.arn -} - -resource "aws_iam_policy" "get_glue_connections_and_tables" { - name = "get_glue_connections_and_tables" - policy = data.aws_iam_policy_document.get_glue_connections_and_tables.json -} - -resource "aws_iam_role_policy_attachment" "get_s3_output" { - role = aws_iam_role.create_athena_table_lambda.name - policy_arn = aws_iam_policy.get_s3_output.arn -} - -resource "aws_iam_policy" "get_s3_output" { - name = "get_s3_output" - policy = data.aws_iam_policy_document.get_s3_output.json -} - - -data "aws_iam_policy_document" "get_glue_connections_and_tables" { - statement { - sid = "SecretsManagerDbCredentialsAccess" - effect = "Allow" - actions = ["secretsmanager:GetSecretValue"] - resources = [aws_secretsmanager_secret_version.db_glue_connection.arn] - } - statement { - sid = "TriggerLambda" - effect = "Allow" - actions = [ - "lambda:InvokeFunction" - ] - resources = [module.create_athena_table.lambda_function_arn] - } - statement { - sid = "GetGlueTables" - effect = "Allow" - actions = [ - "glue:GetTables", - "glue:GetTable", - "glue:GetDatabase", - "glue:GetDatabases", - "glue:CreateTable", - "glue:DeleteTable", - "glue:CreateDatabase", - "glue:DeleteDatabase" - ] - resources = [ - "arn:aws:glue:eu-west-2:${data.aws_caller_identity.current.account_id}:catalog", - "arn:aws:glue:eu-west-2:${data.aws_caller_identity.current.account_id}:database/*", - "arn:aws:glue:eu-west-2:${data.aws_caller_identity.current.account_id}:table/*", - "arn:aws:glue:eu-west-2:${data.aws_caller_identity.current.account_id}:userDefinedFunction/*" - - ] - } -} - -data "aws_iam_policy_document" "get_s3_output" { - statement { - effect = "Allow" - actions = [ - "s3:ListObjects" - ] - resources = [ - "${module.s3-dms-target-store-bucket.bucket.arn}/*" - ] - } - statement { - effect = "Allow" - actions = [ - "s3:ListBucket" - ] - resources = [ - module.s3-dms-target-store-bucket.bucket.arn - ] - } -} - - -# ------------------------------------------------ -# get metadata from rds -# ------------------------------------------------ - -resource "aws_iam_role" "get_metadata_from_rds" { - name = "get_metadata_from_rds_lambda" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - -resource "aws_iam_role_policy_attachment" "get_metadata_from_rds_lambda_vpc_access_execution" { - role = aws_iam_role.get_metadata_from_rds.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" -} - -resource "aws_iam_role_policy_attachment" "get_metadata_from_rds_lambda_sqs_queue_access_execution" { - role = aws_iam_role.get_metadata_from_rds.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaSQSQueueExecutionRole" -} - -resource "aws_iam_role_policy_attachment" "get_metadata_from_rds_get_glue_connections_and_tables" { - role = aws_iam_role.get_metadata_from_rds.name - policy_arn = aws_iam_policy.get_glue_connections_and_tables.arn -} - -resource "aws_iam_role_policy_attachment" "get_metadata_from_rds_get_s3_output" { - role = aws_iam_role.get_metadata_from_rds.name - policy_arn = aws_iam_policy.get_s3_output.arn -} - -resource "aws_iam_role_policy_attachment" "get_metadata_from_rds_write_meta_to_s3" { - role = aws_iam_role.get_metadata_from_rds.name - policy_arn = aws_iam_policy.write_meta_to_s3.arn -} - -resource "aws_iam_policy" "write_meta_to_s3" { - name = "write_meta_to_s3" - policy = data.aws_iam_policy_document.write_meta_to_s3.json -} - -data "aws_iam_policy_document" "write_meta_to_s3" { - statement { - effect = "Allow" - actions = [ - "s3:ListObjects", - "s3:PutObject", - "s3:PutObjectAcl" - ] - resources = [ - "${module.s3-metadata-bucket.bucket.arn}/*" - ] - } - statement { - effect = "Allow" - actions = [ - "s3:ListBucket" - ] - resources = [ - module.s3-metadata-bucket.bucket.arn - ] - } -} - -# ------------------------------------------------ -# Get tables from db -# ------------------------------------------------ - -resource "aws_iam_role" "query_output_to_list" { - name = "query_output_to_list" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - - -# ------------------------------------------ -# get_file_keys_for_table -# ------------------------------------------ - -resource "aws_iam_role" "get_file_keys_for_table" { - name = "get_file_keys_for_table" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - -data "aws_iam_policy_document" "list_target_s3_bucket" { - statement { - effect = "Allow" - actions = ["s3:ListBucket"] - resources = [module.s3-dms-target-store-bucket.bucket.arn] - } -} - -resource "aws_iam_policy" "list_target_s3_bucket" { - name = "list_target_s3_bucket" - policy = data.aws_iam_policy_document.list_target_s3_bucket.json -} -resource "aws_iam_role_policy_attachment" "get_file_keys_for_table_list_target_s3_bucket" { - role = aws_iam_role.get_file_keys_for_table.name - policy_arn = aws_iam_policy.list_target_s3_bucket.arn -} - -# ------------------------------------------ -# update_log_table -# ------------------------------------------ - -resource "aws_iam_role" "update_log_table" { - name = "update_log_table" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - -data "aws_iam_policy_document" "get_log_s3_files" { - statement { - effect = "Allow" - actions = [ - "s3:ListBucket", - "s3:GetObject", - "s3:PutObject", - "s3:GetBucketLocation", - "s3:DeleteObject" - ] - resources = [ - module.s3-dms-data-validation-bucket.bucket.arn, - "${module.s3-dms-data-validation-bucket.bucket.arn}/*" - ] - } -} - -resource "aws_iam_policy" "get_log_s3_files" { - name = "get_log_s3_files" - policy = data.aws_iam_policy_document.get_log_s3_files.json -} -resource "aws_iam_role_policy_attachment" "update_log_table_get_log_s3_files" { - role = aws_iam_role.update_log_table.name - policy_arn = aws_iam_policy.get_log_s3_files.arn -} - - # ------------------------------------------ # output_file_structure_as_json_from_zip # ------------------------------------------ @@ -319,7 +95,9 @@ data "aws_iam_policy_document" "load_json_table_s3_policy_document" { "athena:GetQueryResults", "athena:StopQueryExecution" ] - resources = ["*"] + resources = [ + "arn:aws:athena:${data.aws_region.current.name}:${local.env_account_id}:*/*" + ] } statement { sid = "GluePermissionsForLoadingJsonTable" @@ -334,7 +112,12 @@ data "aws_iam_policy_document" "load_json_table_s3_policy_document" { "glue:DeleteDatabase", "glue:UpdateTable" ] - resources = ["*"] + resources = [ + "arn:aws:glue:${data.aws_region.current.name}:${local.env_account_id}:catalog", + "arn:aws:glue:${data.aws_region.current.name}:${local.env_account_id}:schema/*", + "arn:aws:glue:${data.aws_region.current.name}:${local.env_account_id}:table/*/*", + "arn:aws:glue:${data.aws_region.current.name}:${local.env_account_id}:database/*" + ] } statement { sid = "SecretGetSlackKey" @@ -360,7 +143,7 @@ resource "aws_iam_role_policy_attachment" "load_json_table_s3_policy_policy_atta } # ------------------------------------------ -# unzip fip +# unzip file # ------------------------------------------ resource "aws_iam_role" "unzip_single_file" { @@ -621,3 +404,38 @@ resource "aws_iam_role_policy_attachment" "format_json_fms_data_policy_attachmen role = aws_iam_role.format_json_fms_data.name policy_arn = aws_iam_policy.format_json_fms_data.arn } + +#----------------------------------------------------------------------------------- +# Calculate Checksum Algorithim +#----------------------------------------------------------------------------------- + + +resource "aws_iam_role" "calculate_checksum" { + name = "calculate-checksum-lambda-iam-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json +} + +data "aws_iam_policy_document" "calculate_checksum" { + statement { + sid = "S3Permissions" + effect = "Allow" + actions = [ + "s3:PutObject", + "s3:PutObjectTagging", + "s3:PutObjectAcl", + "s3:GetObject", + "s3:GetObjectVersion", + "s3:GetObjectTagging", + "s3:GetObjectAttributes", + "s3:GetObjectVersionAttributes", + "s3:ListBucket" + ] + resources = ["${module.s3-data-bucket.bucket.arn}/*"] + } +} + +resource "aws_iam_role_policy" "calculate_checksum" { + name = "calculate_checksum-lambda-iam-policy" + role = aws_iam_role.calculate_checksum.id + policy = data.aws_iam_policy_document.calculate_checksum.json +} diff --git a/terraform/environments/electronic-monitoring-data/lambdas_layers.tf b/terraform/environments/electronic-monitoring-data/lambdas_layers.tf deleted file mode 100644 index 3622ecaa903..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas_layers.tf +++ /dev/null @@ -1,51 +0,0 @@ -# -------------------------------------------------------------------------------------------------- -# create_athena_table layer -# -------------------------------------------------------------------------------------------------- -locals { - layer_path = "${local.lambda_path}/layers" - create_athena_table_layer_core = { - layer_zip_name = "create_athena_table_layer.zip" - layer_name = "create_athena_table_layer" - requirements_name = "create_athena_table_requirements.txt" - } - create_athena_table_layer = { - layer_zip_name = local.create_athena_table_layer_core.layer_zip_name - layer_name = local.create_athena_table_layer_core.layer_name - requirements_name = local.create_athena_table_layer_core.requirements_name - requirements_path = "${local.layer_path}/${local.create_athena_table_layer_core.requirements_name}" - layer_zip_path = "${local.layer_path}/${local.create_athena_table_layer_core.layer_zip_name}" - } -} - -resource "aws_lambda_layer_version" "create_athena_table_layer" { - filename = local.create_athena_table_layer.layer_zip_path - layer_name = local.create_athena_table_layer.layer_name - compatible_runtimes = ["python3.11"] - source_code_hash = filesha1(local.create_athena_table_layer.layer_zip_path) -} - -# -------------------------------------------------------------------------------------------------- -# mojap_metadata layer -# -------------------------------------------------------------------------------------------------- -locals { - mojap_metadata_core = { - layer_zip_name = "mojap_metadata.zip" - layer_name = "mojap_metadata" - requirements_name = "mojap_metadata_requirements.txt" - } - - mojap_metadata = { - layer_zip_name = local.mojap_metadata_core.layer_zip_name - layer_name = local.mojap_metadata_core.layer_name - requirements_name = local.mojap_metadata_core.requirements_name - requirements_path = "${local.layer_path}/${local.mojap_metadata_core.requirements_name}" - layer_zip_path = "${local.layer_path}/${local.mojap_metadata_core.layer_zip_name}" - } -} - -resource "aws_lambda_layer_version" "mojap_metadata_layer" { - filename = local.mojap_metadata.layer_zip_path - layer_name = local.mojap_metadata.layer_name - compatible_runtimes = ["python3.11"] - source_code_hash = filesha1(local.mojap_metadata.layer_zip_path) -} \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas_main.tf b/terraform/environments/electronic-monitoring-data/lambdas_main.tf index df247af5d39..37936d8177d 100644 --- a/terraform/environments/electronic-monitoring-data/lambdas_main.tf +++ b/terraform/environments/electronic-monitoring-data/lambdas_main.tf @@ -4,158 +4,6 @@ locals { db_name = local.is-production ? "g4s_cap_dw" : "test" } -# ------------------------------------------------------ -# Get Metadata from RDS Function -# ------------------------------------------------------ - -data "archive_file" "get_metadata_from_rds" { - type = "zip" - source_file = "${local.lambda_path}/get_metadata_from_rds.py" - output_path = "${local.lambda_path}/get_metadata_from_rds.zip" -} - -#checkov:skip=CKV_AWS_272 -module "get_metadata_from_rds_lambda" { - source = "./modules/lambdas" - filename = "${local.lambda_path}/get_metadata_from_rds.zip" - function_name = "get-metadata-from-rds" - role_arn = aws_iam_role.get_metadata_from_rds.arn - role_name = aws_iam_role.get_metadata_from_rds.name - handler = "get_metadata_from_rds.handler" - layers = [ - "arn:aws:lambda:eu-west-2:336392948345:layer:AWSSDKPandas-Python311:12", - aws_lambda_layer_version.mojap_metadata_layer.arn, - aws_lambda_layer_version.create_athena_table_layer.arn - ] - source_code_hash = data.archive_file.get_metadata_from_rds.output_base64sha256 - timeout = 900 - memory_size = 1024 - runtime = "python3.11" - security_group_ids = [aws_security_group.lambda_db_security_group.id] - subnet_ids = data.aws_subnets.shared-public.ids - environment_variables = { - SECRET_NAME = aws_secretsmanager_secret.db_glue_connection.name - METADATA_STORE_BUCKET = module.s3-metadata-bucket.bucket.id - } -} - - - -# ------------------------------------------------------ -# Create Individual Athena Semantic Layer Function -# ------------------------------------------------------ - - -data "archive_file" "create_athena_table" { - type = "zip" - source_file = "${local.lambda_path}/create_athena_table.py" - output_path = "${local.lambda_path}/create_athena_table.zip" -} - -module "create_athena_table" { - source = "./modules/lambdas" - filename = "${local.lambda_path}/create_athena_table.zip" - - function_name = "create_athena_table" - role_arn = aws_iam_role.create_athena_table_lambda.arn - role_name = aws_iam_role.create_athena_table_lambda.name - handler = "create_athena_table.handler" - layers = [ - "arn:aws:lambda:eu-west-2:017000801446:layer:AWSLambdaPowertoolsPythonV2:69", - "arn:aws:lambda:eu-west-2:336392948345:layer:AWSSDKPandas-Python311:12", - aws_lambda_layer_version.mojap_metadata_layer.arn, - aws_lambda_layer_version.create_athena_table_layer.arn - ] - source_code_hash = data.archive_file.create_athena_table.output_base64sha256 - timeout = 900 - memory_size = 1024 - runtime = "python3.11" - security_group_ids = [aws_security_group.lambda_db_security_group.id] - subnet_ids = data.aws_subnets.shared-public.ids - environment_variables = { - S3_BUCKET_NAME = module.s3-dms-target-store-bucket.bucket.id - } -} - -# ------------------------------------------------------ -# get file keys for table -# ------------------------------------------------------ - - -data "archive_file" "get_file_keys_for_table" { - type = "zip" - source_file = "${local.lambda_path}/get_file_keys_for_table.py" - output_path = "${local.lambda_path}/get_file_keys_for_table.zip" -} - -module "get_file_keys_for_table" { - source = "./modules/lambdas" - filename = "${local.lambda_path}/get_file_keys_for_table.zip" - function_name = "get_file_keys_for_table" - role_arn = aws_iam_role.get_file_keys_for_table.arn - role_name = aws_iam_role.get_file_keys_for_table.name - handler = "get_file_keys_for_table.handler" - source_code_hash = data.archive_file.get_file_keys_for_table.output_base64sha256 - layers = null - timeout = 900 - memory_size = 1024 - runtime = "python3.11" - security_group_ids = [aws_security_group.lambda_db_security_group.id] - subnet_ids = data.aws_subnets.shared-public.ids - environment_variables = { - PARQUET_BUCKET_NAME = module.s3-dms-target-store-bucket.bucket.id - } -} - -# ------------------------------------------------------ -# Get Tables from DB -# ------------------------------------------------------ - - -data "archive_file" "query_output_to_list" { - type = "zip" - source_file = "${local.lambda_path}/query_output_to_list.py" - output_path = "${local.lambda_path}/query_output_to_list.zip" -} - -module "query_output_to_list" { - source = "./modules/lambdas" - filename = "${local.lambda_path}/query_output_to_list.zip" - function_name = "query_output_to_list" - role_arn = aws_iam_role.query_output_to_list.arn - role_name = aws_iam_role.query_output_to_list.name - handler = "query_output_to_list.handler" - source_code_hash = data.archive_file.query_output_to_list.output_base64sha256 - layers = null - timeout = 900 - memory_size = 1024 - runtime = "python3.11" - security_group_ids = null - subnet_ids = null - environment_variables = null -} - -# ------------------------------------------------------ -# Update log table -# ------------------------------------------------------ - -module "update_log_table" { - source = "./modules/lambdas" - function_name = "update_log_table" - is_image = true - role_name = aws_iam_role.update_log_table.name - role_arn = aws_iam_role.update_log_table.arn - memory_size = 1024 - timeout = 899 - core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] - production_dev = local.is-production ? "prod" : "dev" - environment_variables = { - S3_LOG_BUCKET = module.s3-dms-data-validation-bucket.bucket.id - DATABASE_NAME = aws_glue_catalog_database.dms_dv_glue_catalog_db.name - TABLE_NAME = "glue_df_output" - } -} - #----------------------------------------------------------------------------------- # S3 lambda function to perform zip file structure extraction into json for Athena #----------------------------------------------------------------------------------- @@ -170,7 +18,7 @@ module "output_file_structure_as_json_from_zip" { timeout = 900 core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] production_dev = local.is-production ? "prod" : "dev" - security_group_ids = [aws_security_group.lambda_db_security_group.id] + security_group_ids = [aws_security_group.lambda_generic.id] subnet_ids = data.aws_subnets.shared-public.ids environment_variables = { OUTPUT_BUCKET = module.s3-json-directory-structure-bucket.bucket.id @@ -178,30 +26,6 @@ module "output_file_structure_as_json_from_zip" { } } -#----------------------------------------------------------------------------------- -# Load data from S3 to Athena -#----------------------------------------------------------------------------------- - -module "load_unstructured_structure" { - source = "./modules/lambdas" - function_name = "load_unstructured_structure" - is_image = true - role_name = aws_iam_role.load_json_table.name - role_arn = aws_iam_role.load_json_table.arn - memory_size = 2048 - timeout = 900 - core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] - production_dev = local.is-production ? "prod" : "dev" - environment_variables = { - DLT_PROJECT_DIR : "/tmp" - DLT_DATA_DIR : "/tmp" - DLT_PIPELINE_DIR : "/tmp" - JSON_BUCKET_NAME = module.s3-json-directory-structure-bucket.bucket.id - ATHENA_DUMP_BUCKET_NAME = module.s3-athena-bucket.bucket.id - } -} - - #----------------------------------------------------------------------------------- # Unzip single file #----------------------------------------------------------------------------------- @@ -214,6 +38,8 @@ module "unzip_single_file" { role_arn = aws_iam_role.unzip_single_file.arn memory_size = 2048 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] production_dev = local.is-production ? "prod" : "dev" environment_variables = { @@ -234,6 +60,8 @@ module "unzipped_presigned_url" { role_arn = aws_iam_role.unzipped_presigned_url.arn memory_size = 2048 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] production_dev = local.is-production ? "prod" : "dev" } @@ -250,6 +78,8 @@ module "rotate_iam_key" { role_arn = aws_iam_role.rotate_iam_keys.arn memory_size = 2048 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] production_dev = local.is-production ? "prod" : "dev" } @@ -268,6 +98,8 @@ module "virus_scan_definition_upload" { role_arn = aws_iam_role.virus_scan_definition_upload.arn memory_size = 2048 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] environment_variables = { MODE = "definition-upload", @@ -298,6 +130,8 @@ module "virus_scan_file" { ephemeral_storage_size = 10240 memory_size = 2048 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] environment_variables = { MODE = "scan", @@ -320,6 +154,53 @@ module "format_json_fms_data" { role_arn = aws_iam_role.format_json_fms_data.arn memory_size = 1024 timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids + core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] + production_dev = local.is-production ? "prod" : "dev" +} + +#----------------------------------------------------------------------------------- +# Calculate checksum +#----------------------------------------------------------------------------------- + +variable "checksum_algorithm" { + type = string + description = "Select Checksum Algorithm. Default and recommended choice is SHA256, however CRC32, CRC32C, SHA1 are also available." + default = "SHA256" +} + +module "calculate_checksum" { + source = "./modules/lambdas" + is_image = true + function_name = "calculate_checksum" + role_name = aws_iam_role.calculate_checksum.name + role_arn = aws_iam_role.calculate_checksum.arn + handler = "calculate_checksum.handler" + memory_size = 4096 + timeout = 900 + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids core_shared_services_id = local.environment_management.account_ids["core-shared-services-production"] production_dev = local.is-production ? "prod" : "dev" + environment_variables = { + Checksum = var.checksum_algorithm + } + +} + +resource "aws_lambda_permission" "allow_sns_invoke_checksum_lambda" { + statement_id = "AllowSNSInvokeChecksum" + action = "lambda:InvokeFunction" + function_name = module.calculate_checksum.lambda_function_name + principal = "sns.amazonaws.com" + source_arn = aws_sns_topic.s3_events.arn +} + +resource "aws_sns_topic_subscription" "checksum_lambda_subscription" { + topic_arn = aws_sns_topic.s3_events.arn + protocol = "lambda" + endpoint = module.calculate_checksum.lambda_function_arn + + depends_on = [aws_lambda_permission.allow_sns_invoke_checksum_lambda] } diff --git a/terraform/environments/electronic-monitoring-data/lambdas_secrets.tf b/terraform/environments/electronic-monitoring-data/lambdas_secrets.tf deleted file mode 100644 index 756ad8a18fe..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas_secrets.tf +++ /dev/null @@ -1,16 +0,0 @@ -resource "aws_secretsmanager_secret" "db_glue_connection" { - name = "db_glue_connection" -} - -resource "aws_secretsmanager_secret_version" "db_glue_connection" { - secret_id = aws_secretsmanager_secret.db_glue_connection.id - secret_string = jsonencode( - { - "host" = "${aws_db_instance.database_2022.address},${aws_db_instance.database_2022.port}", - "username" = aws_db_instance.database_2022.username, - "password" = aws_secretsmanager_secret_version.db_password.secret_string, - "engine" = "sqlserver", - "port" = aws_db_instance.database_2022.port - } - ) -} \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lambdas_security_groups.tf b/terraform/environments/electronic-monitoring-data/lambdas_security_groups.tf deleted file mode 100644 index c778cf0c7e0..00000000000 --- a/terraform/environments/electronic-monitoring-data/lambdas_security_groups.tf +++ /dev/null @@ -1,27 +0,0 @@ -# ------------------------------------------------------------------------------------------ -# create_athena_external_tables: SG for RDS Access -# ------------------------------------------------------------------------------------------add -resource "aws_security_group" "lambda_db_security_group" { - name = "lambda_db_instance_sg" - description = "Security Group allowing lambda access to RDS" - vpc_id = data.aws_vpc.shared.id -} - -resource "aws_vpc_security_group_egress_rule" "lambda_all_outbound" { - security_group_id = aws_security_group.lambda_db_security_group.id - cidr_ipv4 = "0.0.0.0/0" - ip_protocol = "tcp" - from_port = 0 - to_port = 65535 - description = "Lambda outbound access" -} - -resource "aws_vpc_security_group_ingress_rule" "lambda_to_rds_sg_rule" { - security_group_id = aws_security_group.db.id - - referenced_security_group_id = aws_security_group.lambda_db_security_group.id - ip_protocol = "tcp" - from_port = 1433 - to_port = 1433 - description = "Lambda RDS Access" -} diff --git a/terraform/environments/electronic-monitoring-data/lambdas_sg.tf b/terraform/environments/electronic-monitoring-data/lambdas_sg.tf new file mode 100644 index 00000000000..bfecef8e137 --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/lambdas_sg.tf @@ -0,0 +1,30 @@ +resource "aws_security_group" "lambda_generic" { + + name_prefix = "${local.bucket_prefix}-generic-lambda-sg" + description = "Generic Lambda Security Group" + vpc_id = data.aws_vpc.shared.id + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group_rule" "lambda_ingress_generic" { + cidr_blocks = [data.aws_vpc.shared.cidr_block, ] + type = "ingress" + description = "allow all" + from_port = 0 + to_port = 65535 + protocol = "tcp" + security_group_id = aws_security_group.lambda_generic.id +} + +resource "aws_security_group_rule" "lambda_egress_generic" { + type = "egress" + description = "allow all" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = [data.aws_vpc.shared.cidr_block, ] + security_group_id = aws_security_group.lambda_generic.id +} diff --git a/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/main.tf b/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/main.tf index b2f01d367a2..284230506f5 100644 --- a/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/main.tf @@ -94,6 +94,8 @@ module "push_lambda" { timeout = 900 core_shared_services_id = var.core_shared_services_id production_dev = var.production_dev + security_group_ids = var.security_group_ids + subnet_ids = var.subnet_ids environment_variables = { DESTINATION_BUCKET = var.destination_bucket_id } diff --git a/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/variables.tf b/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/variables.tf index ada7b3be6f7..985bc81c6d5 100644 --- a/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/variables.tf +++ b/terraform/environments/electronic-monitoring-data/modules/export_bucket_push/variables.tf @@ -39,4 +39,14 @@ variable "logging_bucket" { variable "production_dev" { description = "The environment the lambda is being deployed to" type = string -} \ No newline at end of file +} + +variable "security_group_ids" { + description = "List of security group IDs associated with the Lambda function." + type = list(string) +} + +variable "subnet_ids" { + description = "List of subnet IDs associated with the Lambda function." + type = list(string) +} diff --git a/terraform/environments/electronic-monitoring-data/modules/lambdas/main.tf b/terraform/environments/electronic-monitoring-data/modules/lambdas/main.tf index 707fe4b3bf6..a0e7f0cac78 100644 --- a/terraform/environments/electronic-monitoring-data/modules/lambdas/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/lambdas/main.tf @@ -5,6 +5,8 @@ locals { data "aws_caller_identity" "current" {} +data "aws_region" "current" {} + resource "aws_sqs_queue" "lambda_dlq" { name = "${var.function_name}-dlq" kms_master_key_id = aws_kms_key.lambda_env_key.id @@ -141,15 +143,8 @@ resource "aws_cloudwatch_log_group" "lambda_cloudwatch_group" { kms_key_id = aws_kms_key.lambda_env_key.arn } - resource "aws_lambda_function" "this" { #checkov:skip=CKV_AWS_272:Lambda needs code-signing, see ELM-1975 - # Zip File config - filename = var.is_image ? null : var.filename - handler = var.is_image ? null : var.handler - layers = var.is_image ? null : var.layers - source_code_hash = var.is_image ? null : var.source_code_hash - runtime = var.is_image ? null : var.runtime # Image config image_uri = var.is_image ? "${var.core_shared_services_id}.dkr.ecr.eu-west-2.amazonaws.com/${var.ecr_repo_name}:${local.function_uri}" : null package_type = var.is_image ? "Image" : null @@ -164,7 +159,7 @@ resource "aws_lambda_function" "this" { } dynamic "vpc_config" { - for_each = local.use_vpc_config ? [1] : [] + for_each = var.security_group_ids != null && var.subnet_ids != null ? [1] : [] content { security_group_ids = var.security_group_ids subnet_ids = var.subnet_ids diff --git a/terraform/environments/electronic-monitoring-data/modules/lambdas/variables.tf b/terraform/environments/electronic-monitoring-data/modules/lambdas/variables.tf index 42088e717a9..251f74a549b 100644 --- a/terraform/environments/electronic-monitoring-data/modules/lambdas/variables.tf +++ b/terraform/environments/electronic-monitoring-data/modules/lambdas/variables.tf @@ -76,15 +76,11 @@ variable "runtime" { variable "security_group_ids" { description = "List of security group IDs associated with the Lambda function." type = list(string) - nullable = true - default = null } variable "subnet_ids" { description = "List of subnet IDs associated with the Lambda function." type = list(string) - nullable = true - default = null } variable "environment_variables" { @@ -95,7 +91,7 @@ variable "environment_variables" { } variable "reserved_concurrent_executions" { - description = "The amount of reserved concurrent executions for the Lambda function." + description = "The amount m of reserved concurrent executions for the Lambda function." type = number default = 10 } @@ -133,3 +129,11 @@ variable "ephemeral_storage_size" { type = number default = 512 } + +variable "s3_bucket" { + description = "The name of the S3 bucket where the Lambda layer code is stored" + type = string + nullable = true + default = null +} + diff --git a/terraform/environments/electronic-monitoring-data/modules/lambdas/versions.tf b/terraform/environments/electronic-monitoring-data/modules/lambdas/versions.tf new file mode 100644 index 00000000000..37200cffc68 --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/modules/lambdas/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + external = { + source = "hashicorp/external" + version = "~> 2.0" + } + } + required_version = ">= 1.0.1" +} diff --git a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf index 0c08d9bbd05..181b81e3a8d 100644 --- a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf @@ -155,6 +155,8 @@ module "process_landing_bucket_files" { timeout = 900 core_shared_services_id = var.core_shared_services_id production_dev = var.production_dev + security_group_ids = var.security_group_ids + subnet_ids = var.subnet_ids environment_variables = { DESTINATION_BUCKET = var.received_files_bucket_id } diff --git a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/variables.tf b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/variables.tf index 1be92a4e491..143389f769c 100644 --- a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/variables.tf +++ b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/variables.tf @@ -54,3 +54,13 @@ variable "received_files_bucket_id" { description = "The id of the bucket data will be moved to" type = string } + +variable "security_group_ids" { + description = "List of security group IDs associated with the Lambda function." + type = list(string) +} + +variable "subnet_ids" { + description = "List of subnet IDs associated with the Lambda function." + type = list(string) +} diff --git a/terraform/environments/electronic-monitoring-data/retrigger-large-zip-file.json b/terraform/environments/electronic-monitoring-data/retrigger-large-zip-file.json deleted file mode 100644 index 92a93bee554..00000000000 --- a/terraform/environments/electronic-monitoring-data/retrigger-large-zip-file.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "Records": [ - { - "eventVersion": "2.0", - "eventSource": "aws:s3", - "awsRegion": "us-east-1", - "eventTime": "1970-01-01T00:00:00.000Z", - "eventName": "ObjectCreated:Put", - "userIdentity": { - "principalId": "EXAMPLE" - }, - "requestParameters": { - "sourceIPAddress": "127.0.0.1" - }, - "responseElements": { - "x-amz-request-id": "EXAMPLE123456789", - "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH" - }, - "s3": { - "s3SchemaVersion": "1.0", - "configurationId": "testConfigRule", - "bucket": { - "name": "em-data-store-20240131164239595200000002", - "ownerIdentity": { - "principalId": "EXAMPLE" - }, - "arn": "arn:aws:s3:::example-bucket" - }, - "object": { - "key": "g4s/atrium_unstructured/2024-03-22git/test.zip", - "size": 1024, - "eTag": "0123456789abcdef0123456789abcdef", - "sequencer": "0A1B2C3D4E5F678901" - } - } - } - ] -} diff --git a/terraform/environments/electronic-monitoring-data/s3.tf b/terraform/environments/electronic-monitoring-data/s3.tf index 5b7fb963bbb..4a649460f8a 100644 --- a/terraform/environments/electronic-monitoring-data/s3.tf +++ b/terraform/environments/electronic-monitoring-data/s3.tf @@ -539,6 +539,57 @@ module "s3-data-bucket" { tags = local.tags } + +# bucket notification for data store +resource "aws_s3_bucket_notification" "data_store" { + depends_on = [aws_sns_topic_policy.s3_events_policy] + bucket = module.s3-data-bucket.bucket.id + + # Only for copy events as those are events triggered by data being copied + #  from landing bucket. + topic { + topic_arn = aws_sns_topic.s3_events.arn + events = [ + "s3:ObjectCreated:*" + ] + } +} + +# sns topic to allow multiple lambdas to be triggered off of it +resource "aws_sns_topic" "s3_events" { + name = "${module.s3-data-bucket.bucket.id}-object-created-topic" + kms_master_key_id = "alias/aws/sns" +} + +# IAM policy document for the SNS topic policy +data "aws_iam_policy_document" "sns_policy" { + statement { + effect = "Allow" + + principals { + type = "Service" + identifiers = ["s3.amazonaws.com"] + } + + actions = ["SNS:Publish"] + resources = [aws_sns_topic.s3_events.arn] + + condition { + test = "ArnLike" + variable = "aws:SourceArn" + values = [module.s3-data-bucket.bucket.arn] + } + } +} + +# Apply policy to the SNS topic +resource "aws_sns_topic_policy" "s3_events_policy" { + arn = aws_sns_topic.s3_events.arn + policy = data.aws_iam_policy_document.sns_policy.json +} + + + # ------------------------------------------------------------------------ # Landing buckets FMS # ------------------------------------------------------------------------ @@ -555,6 +606,8 @@ module "s3-fms-general-landing-bucket" { logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" received_files_bucket_id = module.s3-received-files-bucket.bucket.id + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids providers = { aws = aws @@ -586,6 +639,8 @@ module "s3-fms-specials-landing-bucket" { logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" received_files_bucket_id = module.s3-received-files-bucket.bucket.id + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids providers = { aws = aws @@ -622,6 +677,8 @@ module "s3-mdss-general-landing-bucket" { logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" received_files_bucket_id = module.s3-received-files-bucket.bucket.id + subnet_ids = data.aws_subnets.shared-public.ids + security_group_ids = [aws_security_group.lambda_generic.id] providers = { aws = aws @@ -641,6 +698,8 @@ module "s3-mdss-ho-landing-bucket" { logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" received_files_bucket_id = module.s3-received-files-bucket.bucket.id + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids providers = { aws = aws @@ -660,6 +719,8 @@ module "s3-mdss-specials-landing-bucket" { logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" received_files_bucket_id = module.s3-received-files-bucket.bucket.id + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids providers = { aws = aws @@ -680,6 +741,8 @@ module "s3-p1-export-bucket" { local_tags = local.tags logging_bucket = module.s3-logging-bucket production_dev = local.is-production ? "prod" : "dev" + security_group_ids = [aws_security_group.lambda_generic.id] + subnet_ids = data.aws_subnets.shared-public.ids providers = { aws = aws diff --git a/terraform/environments/electronic-monitoring-data/step_functions_iam.tf b/terraform/environments/electronic-monitoring-data/step_functions_iam.tf index 878c35619b1..2f58afa09a8 100644 --- a/terraform/environments/electronic-monitoring-data/step_functions_iam.tf +++ b/terraform/environments/electronic-monitoring-data/step_functions_iam.tf @@ -1,42 +1,3 @@ -# ------------------------------------------ -# Fake Athena Layer -# ------------------------------------------ - -data "aws_iam_policy_document" "lambda_invoke_policy" { - statement { - effect = "Allow" - - actions = [ - "lambda:InvokeFunction", - ] - - resources = [ - "${module.get_metadata_from_rds_lambda.lambda_function_arn}:*", - "${module.create_athena_table.lambda_function_arn}:*", - "${module.get_file_keys_for_table.lambda_function_arn}:*", - ] - } - statement { - effect = "Allow" - - actions = [ - "lambda:InvokeFunction", - ] - - resources = [ - module.get_metadata_from_rds_lambda.lambda_function_arn, - module.create_athena_table.lambda_function_arn, - module.get_file_keys_for_table.lambda_function_arn, - ] - } -} - -resource "aws_iam_policy" "lambda_invoke_policy" { - name = "LambdaInvokePolicy" - description = "Policy to allow invoking specific Lambda functions" - policy = data.aws_iam_policy_document.lambda_invoke_policy.json -} - # ------------------------------------------ # Unzip Files # ------------------------------------------ diff --git a/terraform/environments/electronic-monitoring-data/step_functions_main.tf b/terraform/environments/electronic-monitoring-data/step_functions_main.tf index 8cb6b5726ed..dc064d20d19 100644 --- a/terraform/environments/electronic-monitoring-data/step_functions_main.tf +++ b/terraform/environments/electronic-monitoring-data/step_functions_main.tf @@ -1,17 +1,3 @@ -# ------------------------------------------ -# Fake Athena Layer -# ------------------------------------------ - -module "athena_layer" { - source = "./modules/step_function" - name = "athena_layer" - iam_policies = tomap({ "lambda_invoke_policy" = aws_iam_policy.lambda_invoke_policy }) - variable_dictionary = tomap({ - get_metadata_lambda_arn = module.get_metadata_from_rds_lambda.lambda_function_arn - create_athena_table = module.create_athena_table.lambda_function_arn - }) -} - # ------------------------------------------ # Unzip Files # ------------------------------------------