From 1c96810b09cfb82b1cedb365b01038cba07976b0 Mon Sep 17 00:00:00 2001 From: Tarun Menon <64295670+tarunmenon95@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:28:13 +1000 Subject: [PATCH] Feature/reencryption refactor (#150) * revert existing re-encryption * Update logging * more logging * add kms key tag to backup resource * fix tag * add check on kms tag * add re-encrypt absract function * fix reference to backup tags * update rds encrypt checks * catch error for no snapshots * add param for reencrypt to sam template * add func to check snaps in encrypt process * fix error handling for no snapshot * fixes * more logging * logging * Fix syntax error in snapshot identifier * remove debug logging * readd docdb fix --- Jenkinsfile | 1 + README.md | 4 +- deploy-sam-template.sh | 2 +- setup.py | 2 +- shelvery/__init__.py | 2 +- shelvery/documentdb_backup.py | 3 ++ shelvery/ebs_backup.py | 4 ++ shelvery/ec2ami_backup.py | 3 ++ shelvery/engine.py | 24 +++++++-- shelvery/rds_backup.py | 87 +++++++++++++------------------- shelvery/rds_cluster_backup.py | 90 +++++++++++++--------------------- shelvery/redshift_backup.py | 3 ++ template.yaml | 9 +++- 13 files changed, 114 insertions(+), 120 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 055cbbb..b27ab05 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -31,6 +31,7 @@ pipeline { } stage('Unit Tests') { + when { changeRequest target: 'master' } steps { script { //Source Account diff --git a/README.md b/README.md index d2aa9f1..48b379e 100644 --- a/README.md +++ b/README.md @@ -184,8 +184,8 @@ IAM role that Lambda is running under. ## Runtime environment -Shelvery requires Python3.6 to run. You can run it either from any server or local machine capable of interpreting -Python3.6 code, or as Amazon Lambda functions. All Shelvery code is written in such way that it supports +Shelvery requires Python3.11 to run. You can run it either from any server or local machine capable of interpreting +Python3.11 code, or as Amazon Lambda functions. All Shelvery code is written in such way that it supports both CLI and Lambda execution. ## Backup lifecycle and retention periods diff --git a/deploy-sam-template.sh b/deploy-sam-template.sh index 8fb7c7d..2b4a25d 100755 --- a/deploy-sam-template.sh +++ b/deploy-sam-template.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -SHELVERY_VERSION=0.9.9 +SHELVERY_VERSION=0.9.10 # set DOCKERUSERID to current user. could be changed with -u uid DOCKERUSERID="-u $(id -u)" diff --git a/setup.py b/setup.py index a136e65..a3bad33 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -setup(name='shelvery', version='0.9.9', author='Base2Services R&D', +setup(name='shelvery', version='0.9.10', author='Base2Services R&D', author_email='itsupport@base2services.com', url='http://github.com/base2Services/shelvery-aws-backups', classifiers=[ diff --git a/shelvery/__init__.py b/shelvery/__init__.py index 69d3820..c860709 100644 --- a/shelvery/__init__.py +++ b/shelvery/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.9.9' +__version__ = '0.9.10' LAMBDA_WAIT_ITERATION = 'lambda_wait_iteration' S3_DATA_PREFIX = 'backups' SHELVERY_DO_BACKUP_TAGS = ['True', 'true', '1', 'TRUE'] diff --git a/shelvery/documentdb_backup.py b/shelvery/documentdb_backup.py index 5d5a920..401520b 100644 --- a/shelvery/documentdb_backup.py +++ b/shelvery/documentdb_backup.py @@ -121,6 +121,9 @@ def copy_backup_to_region(self, backup_id: str, region: str) -> str: CopyTags=False ) return backup_id + + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + return backup_id def copy_shared_backup(self, source_account: str, source_backup: BackupResource): docdb_client = AwsHelper.boto3_client('docdb', arn=self.role_arn, external_id=self.role_external_id) diff --git a/shelvery/ebs_backup.py b/shelvery/ebs_backup.py index f7cb68a..472e0ac 100644 --- a/shelvery/ebs_backup.py +++ b/shelvery/ebs_backup.py @@ -125,6 +125,10 @@ def copy_shared_backup(self, source_account: str, source_backup: BackupResource) SourceRegion=source_backup.region ) return snap['SnapshotId'] + + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + return backup_id + # collect all volumes tagged with given tag, in paginated manner def collect_volumes(self, tag_name: str): load_volumes = True diff --git a/shelvery/ec2ami_backup.py b/shelvery/ec2ami_backup.py index b8fba27..ded23d5 100644 --- a/shelvery/ec2ami_backup.py +++ b/shelvery/ec2ami_backup.py @@ -221,3 +221,6 @@ def share_backup_with_account(self, backup_region: str, backup_id: str, aws_acco }, UserIds=[aws_account_id], OperationType='add') + + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + return backup_id diff --git a/shelvery/engine.py b/shelvery/engine.py index b9b1f43..96c982e 100644 --- a/shelvery/engine.py +++ b/shelvery/engine.py @@ -256,6 +256,12 @@ def create_backups(self) -> List[BackupResource]: dr_regions = RuntimeConfig.get_dr_regions(backup_resource.entity_resource.tags, self) backup_resource.tags[f"{RuntimeConfig.get_tag_prefix()}:dr_regions"] = ','.join(dr_regions) + + re_encrypt_key = RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.entity_resource.tags, self) + if re_encrypt_key := RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.entity_resource.tags, self): + backup_resource.tags[f"{RuntimeConfig.get_tag_prefix()}:config:shelvery_reencrypt_kms_key_id"] = re_encrypt_key + + self.logger.info(f"Processing {resource_type} with id {r.resource_id}") self.logger.info(f"Creating backup {backup_resource.name}") @@ -661,6 +667,11 @@ def do_share_backup(self, map_args={}, **kwargs): backup_region = kwargs['Region'] destination_account_id = kwargs['AwsAccountId'] backup_resource = self.get_backup_resource(backup_region, backup_id) + + if re_encrypt_key := RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.tags, self): + self.logger.info(f"KMS Key detected during share for {backup_resource.backup_id}") + backup_id = self.create_encrypted_backup(backup_id, re_encrypt_key, backup_region) + # if backup is not available, exit and rely on recursive lambda call do share backup # in non lambda mode this should never happen if RuntimeConfig.is_offload_queueing(self): @@ -675,11 +686,8 @@ def do_share_backup(self, map_args={}, **kwargs): self.logger.info(f"Do share backup {backup_id} ({backup_region}) with {destination_account_id}") try: - new_backup_id = self.share_backup_with_account(backup_region, backup_id, destination_account_id) - #assign new backup id if new snapshot is created (eg: re-encrypted rds snapshot) - backup_id = new_backup_id if new_backup_id else backup_id - self.logger.info(f"Shared backup {backup_id} ({backup_region}) with {destination_account_id}") - backup_resource = self.get_backup_resource(backup_region, backup_id) + self.share_backup_with_account(backup_region, backup_id, destination_account_id) + backup_resource = self.get_backup_resource(backup_region, backup_id) self._write_backup_data( backup_resource, self._get_data_bucket(backup_region), @@ -840,3 +848,9 @@ def get_backup_resource(self, backup_region: str, backup_id: str) -> BackupResou """ Get Backup Resource within region, identified by its backup_id """ + + @abstractmethod + def create_encrypted_backup(self, backup_id: str, kms_key: str, backup_region: str) -> str: + """ + Re-encrypt an existing backup with a new KMS key, returns the new backup id + """ \ No newline at end of file diff --git a/shelvery/rds_backup.py b/shelvery/rds_backup.py index c116021..5c1f79d 100644 --- a/shelvery/rds_backup.py +++ b/shelvery/rds_backup.py @@ -95,53 +95,11 @@ def get_existing_backups(self, backup_tag_prefix: str) -> List[BackupResource]: def share_backup_with_account(self, backup_region: str, backup_id: str, aws_account_id: str): rds_client = AwsHelper.boto3_client('rds', region_name=backup_region, arn=self.role_arn, external_id=self.role_external_id) - backup_resource = self.get_backup_resource(backup_region, backup_id) - kms_key = RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.tags, self) - - # if a re-encrypt key is provided, create new re-encrypted snapshot and share that instead - if kms_key: - self.logger.info(f"Re-encrypt KMS Key found, creating new backup with {kms_key}") - # create re-encrypted backup - backup_id = self.copy_backup_to_region(backup_id, backup_region) - self.logger.info(f"Creating new encrypted backup {backup_id}") - # wait till new snapshot is available - if not self.wait_backup_available(backup_region=backup_region, - backup_id=backup_id, - lambda_method='do_share_backup', - lambda_args={}): - return - self.logger.info(f"New encrypted backup {backup_id} created") - - #Get new snapshot ARN - snapshots = rds_client.describe_db_snapshots(DBSnapshotIdentifier=backup_id) - snapshot_arn = snapshots['DBSnapshots'][0]['DBSnapshotArn'] - - #Update tags with '-re-encrypted' suffix - self.logger.info(f"Updating tags for new snapshot - {backup_id}") - tags = self.get_backup_resource(backup_region, backup_id).tags - tags.update({'Name': backup_id, 'shelvery:name': backup_id}) - tag_list = [{'Key': key, 'Value': value} for key, value in tags.items()] - rds_client.add_tags_to_resource( - ResourceName=snapshot_arn, - Tags=tag_list - ) - created_new_encrypted_snapshot = True - else: - self.logger.info(f"No re-encrypt key detected") - created_new_encrypted_snapshot = False - rds_client.modify_db_snapshot_attribute( DBSnapshotIdentifier=backup_id, AttributeName='restore', ValuesToAdd=[aws_account_id] ) - # if re-encryption occured, clean up old snapshot - if created_new_encrypted_snapshot: - # delete old snapshot - self.delete_backup(backup_resource) - self.logger.info(f"Cleaning up un-encrypted backup: {backup_resource.backup_id}") - - return backup_id def copy_backup_to_region(self, backup_id: str, region: str) -> str: local_region = boto3.session.Session().region_name @@ -149,22 +107,45 @@ def copy_backup_to_region(self, backup_id: str, region: str) -> str: rds_client = AwsHelper.boto3_client('rds', region_name=region, arn=self.role_arn, external_id=self.role_external_id) snapshots = client_local.describe_db_snapshots(DBSnapshotIdentifier=backup_id) snapshot = snapshots['DBSnapshots'][0] - backup_resource = self.get_backup_resource(local_region, backup_id) - kms_key = RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.tags, self) + rds_client.copy_db_snapshot( + SourceDBSnapshotIdentifier=snapshot['DBSnapshotArn'], + TargetDBSnapshotIdentifier=backup_id, + SourceRegion=local_region, + # tags are created explicitly + CopyTags=False + ) + return backup_id + + def snapshot_exists(self, client, backup_id): + try: + response = client.describe_db_snapshots(DBSnapshotIdentifier=backup_id) + snapshots = response.get('DBSnapshots', []) + return bool(snapshots) + except ClientError as e: + if e.response['Error']['Code'] == 'DBSnapshotNotFound': + return False + else: + print(e.response['Error']['Code']) + raise e + + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + local_region = boto3.session.Session().region_name + client_local = AwsHelper.boto3_client('rds', arn=self.role_arn, external_id=self.role_external_id) + rds_client = AwsHelper.boto3_client('rds', region_name=region, arn=self.role_arn, external_id=self.role_external_id) + snapshots = client_local.describe_db_snapshots(DBSnapshotIdentifier=backup_id) + snapshot = snapshots['DBSnapshots'][0] + backup_id = f'{backup_id}-re-encrypted' + + if self.snapshot_exists(rds_client, backup_id): + return backup_id + rds_client_params = { 'SourceDBSnapshotIdentifier': snapshot['DBSnapshotArn'], 'TargetDBSnapshotIdentifier': backup_id, 'SourceRegion': local_region, - # tags are created explicitly - 'CopyTags': False + 'CopyTags': True, + 'KmsKeyId': kms_key, } - # add kms key params if reencrypt key is defined - if kms_key is not None: - backup_id = f'{backup_id}-re-encrypted' - rds_client_params['KmsKeyId'] = kms_key - rds_client_params['CopyTags'] = True - rds_client_params['TargetDBSnapshotIdentifier'] = backup_id - rds_client.copy_db_snapshot(**rds_client_params) return backup_id diff --git a/shelvery/rds_cluster_backup.py b/shelvery/rds_cluster_backup.py index f88cca7..79579bd 100644 --- a/shelvery/rds_cluster_backup.py +++ b/shelvery/rds_cluster_backup.py @@ -1,4 +1,3 @@ -from tracemalloc import Snapshot import boto3 from shelvery.runtime_config import RuntimeConfig @@ -97,53 +96,11 @@ def get_existing_backups(self, backup_tag_prefix: str) -> List[BackupResource]: def share_backup_with_account(self, backup_region: str, backup_id: str, aws_account_id: str): rds_client = AwsHelper.boto3_client('rds', region_name=backup_region, arn=self.role_arn, external_id=self.role_external_id) - backup_resource = self.get_backup_resource(backup_region, backup_id) - kms_key = RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.tags, self) - - # if a re-encrypt key is provided, create new re-encrypted snapshot and share that instead - if kms_key: - self.logger.info(f"Re-encrypt KMS Key found, creating new backup with {kms_key}") - # create re-encrypted backup - backup_id = self.copy_backup_to_region(backup_id, backup_region) - self.logger.info(f"Creating new encrypted backup {backup_id}") - # wait till new snapshot is available - if not self.wait_backup_available(backup_region=backup_region, - backup_id=backup_id, - lambda_method='do_share_backup', - lambda_args={}): - return - self.logger.info(f"New encrypted backup {backup_id} created") - - #Get new snapshot ARN - snapshots = rds_client.describe_db_cluster_snapshots(DBClusterSnapshotIdentifier=backup_id) - snapshot_arn = snapshots['DBClusterSnapshots'][0]['DBClusterSnapshotArn'] - - #Update tags with '-re-encrypted' suffix - self.logger.info(f"Updating tags for new snapshot - {backup_id}") - tags = self.get_backup_resource(backup_region, backup_id).tags - tags.update({'Name': backup_id, 'shelvery:name': backup_id}) - tag_list = [{'Key': key, 'Value': value} for key, value in tags.items()] - rds_client.add_tags_to_resource( - ResourceName=snapshot_arn, - Tags=tag_list - ) - created_new_encrypted_snapshot = True - else: - self.logger.info(f"No re-encrypt key detected") - created_new_encrypted_snapshot = False - rds_client.modify_db_cluster_snapshot_attribute( DBClusterSnapshotIdentifier=backup_id, AttributeName='restore', ValuesToAdd=[aws_account_id] ) - # if re-encryption occured, clean up old snapshot - if created_new_encrypted_snapshot: - # delete old snapshot - self.delete_backup(backup_resource) - self.logger.info(f"Cleaning up un-encrypted backup: {backup_resource.backup_id}") - - return backup_id def copy_backup_to_region(self, backup_id: str, region: str) -> str: local_region = boto3.session.Session().region_name @@ -151,20 +108,45 @@ def copy_backup_to_region(self, backup_id: str, region: str) -> str: rds_client = AwsHelper.boto3_client('rds', region_name=region) snapshots = client_local.describe_db_cluster_snapshots(DBClusterSnapshotIdentifier=backup_id) snapshot = snapshots['DBClusterSnapshots'][0] - backup_resource = self.get_backup_resource(local_region, backup_id) - kms_key = RuntimeConfig.get_reencrypt_kms_key_id(backup_resource.tags, self) + rds_client.copy_db_cluster_snapshot( + SourceDBClusterSnapshotIdentifier=snapshot['DBClusterSnapshotArn'], + TargetDBClusterSnapshotIdentifier=backup_id, + SourceRegion=local_region, + # tags are created explicitly + CopyTags=False + ) + return backup_id + + def snapshot_exists(client, backup_id): + try: + response = client.describe_db_cluster_snapshots(DBClusterSnapshotIdentifier=backup_id) + snapshots = response.get('DBClusterSnapshots', []) + return bool(snapshots) + except ClientError as e: + if e.response['Error']['Code'] == 'DBClusterSnapshotNotFound': + return False + else: + print(e.response['Error']['Code']) + raise e + + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + local_region = boto3.session.Session().region_name + client_local = AwsHelper.boto3_client('rds', arn=self.role_arn, external_id=self.role_external_id) + rds_client = AwsHelper.boto3_client('rds', region_name=region) + snapshots = client_local.describe_db_cluster_snapshots(DBClusterSnapshotIdentifier=backup_id) + snapshot = snapshots['DBClusterSnapshots'][0] + backup_id = f'{backup_id}-re-encrypted' + + if self.snapshot_exists(rds_client, backup_id): + return backup_id + rds_client_params = { 'SourceDBClusterSnapshotIdentifier': snapshot['DBClusterSnapshotArn'], 'TargetDBClusterSnapshotIdentifier': backup_id, 'SourceRegion': local_region, - 'CopyTags': False + 'CopyTags': True, + 'KmsKeyId': kms_key, } - # add kms key params if re-encrypt key is defined - if kms_key is not None: - backup_id = f'{backup_id}-re-encrypted' - rds_client_params['KmsKeyId'] = kms_key - rds_client_params['CopyTags'] = True - rds_client_params['TargetDBClusterSnapshotIdentifier'] = backup_id rds_client.copy_db_cluster_snapshot(**rds_client_params) return backup_id @@ -255,7 +237,6 @@ def get_all_clusters(self, rds_client): db_clusters = [] # temporary list of api models, as calls are batched temp_clusters = rds_client.describe_db_clusters() - db_clusters.extend(temp_clusters['DBClusters']) # collect database instances while 'Marker' in temp_clusters: @@ -304,9 +285,8 @@ def collect_all_snapshots(self, rds_client): self.logger.info(f"Collected {len(tmp_snapshots['DBClusterSnapshots'])} manual snapshots. Continuing collection...") tmp_snapshots = rds_client.describe_db_cluster_snapshots(SnapshotType='manual', Marker=tmp_snapshots['Marker']) all_snapshots.extend(tmp_snapshots['DBClusterSnapshots']) - - all_snapshots = [snapshot for snapshot in all_snapshots if snapshot.get('Engine') != 'docdb'] + all_snapshots = [snapshot for snapshot in all_snapshots if snapshot.get('Engine') != 'docdb'] self.logger.info(f"Collected {len(all_snapshots)} manual snapshots.") self.populate_snap_entity_resource(all_snapshots) diff --git a/shelvery/redshift_backup.py b/shelvery/redshift_backup.py index 3f0c30a..1e6476c 100644 --- a/shelvery/redshift_backup.py +++ b/shelvery/redshift_backup.py @@ -215,6 +215,9 @@ def copy_backup_to_region(self, backup_id: str, region: str) -> str: "using EnableSnapshotCopy API Call.") pass + def create_encrypted_backup(self, backup_id: str, kms_key: str, region: str) -> str: + return backup_id + def is_backup_available(self, backup_region: str, backup_id: str) -> bool: """ Determine whether backup has completed and is available to be copied diff --git a/template.yaml b/template.yaml index 9151324..25f8988 100644 --- a/template.yaml +++ b/template.yaml @@ -67,7 +67,11 @@ Parameters: Description: Duing the pull backup task, Kms key id to encrypt snapshots with Type: String Default: '' - + ShelveryReencryptKmsKeyId: + Description: During the share task, Kms key id to encrypt snapshots with + Type: String + Default: '' + Resources: SnsTopic: @@ -118,7 +122,7 @@ Resources: Tags: Name: Shelvery CreatedBy: Shelvery - ShelveryVersion: 0.9.9 + ShelveryVersion: 0.9.10 Environment: Variables: @@ -138,6 +142,7 @@ Resources: shelvery_sqs_queue_wait_period: 300 shelvery_encrypt_copy: !Ref ShelveryEncryptCopy shelvery_copy_kms_key_id: !Ref ShelveryCopyKmsKeyId + shelvery_reencrypt_kms_key_id: !Ref ShelveryReencryptKmsKeyId Events: