From 811386778523af566ba94d7129a145d9237a84c4 Mon Sep 17 00:00:00 2001 From: Tariq Yusuf Date: Thu, 18 Jul 2024 13:37:36 -0700 Subject: [PATCH] Include AWS Tags in Resource Generation (#4998) Co-authored-by: Dave Quinlan <83430497+daveqnet@users.noreply.github.com> Co-authored-by: Steve Murphy --- CHANGELOG.md | 2 ++ Dockerfile | 2 +- src/fides/api/tasks/storage.py | 5 +--- src/fides/connectors/aws.py | 41 ++++++++++++++++++-------------- src/fides/core/system.py | 4 ++-- tests/ctl/connectors/test_aws.py | 4 ++++ tests/ctl/core/test_api.py | 1 + 7 files changed, 34 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe8b35340d..c463eda296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ The types of changes are: ## [2.41.0](https://github.com/ethyca/fides/compare/2.40.0...2.41.0) +- Add AWS Tags in the meta field for Fides system when using `fides generate` [#4998](https://github.com/ethyca/fides/pull/4998). + ### Added - Added erasure support for Alchemer integration [#4925](https://github.com/ethyca/fides/pull/4925) - Added new columns and action buttons to discovery monitors table [#5068](https://github.com/ethyca/fides/pull/5068) diff --git a/Dockerfile b/Dockerfile index 52d4668de2..5045f63997 100644 --- a/Dockerfile +++ b/Dockerfile @@ -171,4 +171,4 @@ RUN pip install dist/ethyca_fides-*.tar.gz # Remove this directory to prevent issues with catch all RUN rm -r /fides/src/fides/ui-build -USER fidesuser \ No newline at end of file +USER fidesuser diff --git a/src/fides/api/tasks/storage.py b/src/fides/api/tasks/storage.py index 57aff48be4..85fd3f1d4f 100644 --- a/src/fides/api/tasks/storage.py +++ b/src/fides/api/tasks/storage.py @@ -8,7 +8,6 @@ from typing import Any, Dict, Optional, Set, Union import pandas as pd -from boto3 import Session from botocore.exceptions import ClientError, ParamValidationError from loguru import logger @@ -102,9 +101,7 @@ def write_to_in_memory_buffer( raise NotImplementedError(f"No handling for response format {resp_format}.") -def create_presigned_url_for_s3( - s3_client: Session, bucket_name: str, file_key: str -) -> str: +def create_presigned_url_for_s3(s3_client: Any, bucket_name: str, file_key: str) -> str: """ "Generate a presigned URL to share an S3 object :param s3_client: s3 base client diff --git a/src/fides/connectors/aws.py b/src/fides/connectors/aws.py index 3587a10522..64fb60a5bc 100644 --- a/src/fides/connectors/aws.py +++ b/src/fides/connectors/aws.py @@ -22,7 +22,7 @@ from fides.core.utils import generate_unique_fides_key -def get_aws_client(service: str, aws_config: Optional[AWSConfig]) -> Any: # type: ignore +def get_aws_client(service: str, aws_config: Optional[AWSConfig]) -> Any: """ Creates boto3 client for a given service. A config is optional to allow for environment variable configuration. @@ -31,7 +31,7 @@ def get_aws_client(service: str, aws_config: Optional[AWSConfig]) -> Any: # typ service_client = boto3.client( service, **config_dict, - ) + ) # type: ignore return service_client @@ -156,17 +156,17 @@ def scan_dynamo_table(client: Any, table_name: str, num_samples: int = 30) -> Li @handle_common_aws_errors -def get_tagging_resources(client: Any) -> List[str]: # type: ignore +def get_tagging_resources(client: Any) -> List[Dict]: # type: ignore """ - Returns a list of resource arns given a 'resourcegroupstaggingapi' boto3 client. + Returns a list of resources given a 'resourcegroupstaggingapi' boto3 client. """ paginator = client.get_paginator("get_resources") - found_arns = [ - resource["ResourceARN"] + resources = [ + resource for page in paginator.paginate() for resource in page["ResourceTagMappingList"] ] - return found_arns + return resources def create_dynamodb_dataset( @@ -244,6 +244,7 @@ def create_redshift_systems( fides_key=cluster["ClusterIdentifier"], name=cluster["ClusterIdentifier"], description=f"Fides Generated Description for Redshift Cluster: {cluster['ClusterIdentifier']}", + meta={(pair["Key"], pair["Value"]) for pair in cluster.get("Tags", {})}, system_type="redshift_cluster", organization_fides_key=organization_key, fidesctl_meta=SystemMetadata( @@ -279,6 +280,7 @@ def create_rds_systems( fides_key=cluster["DBClusterIdentifier"], name=cluster["DBClusterIdentifier"], description=f"Fides Generated Description for RDS Cluster: {cluster['DBClusterIdentifier']}", + meta={(pair["Key"], pair["Value"]) for pair in cluster.get("TagList", {})}, system_type="rds_cluster", organization_fides_key=organization_key, fidesctl_meta=SystemMetadata( @@ -295,6 +297,7 @@ def create_rds_systems( fides_key=instance["DBInstanceIdentifier"], name=instance["DBInstanceIdentifier"], description=f"Fides Generated Description for RDS Instance: {instance['DBInstanceIdentifier']}", + meta={(pair["Key"], pair["Value"]) for pair in instance.get("TagList", {})}, system_type="rds_instance", organization_fides_key=organization_key, fidesctl_meta=SystemMetadata( @@ -317,11 +320,11 @@ def create_rds_systems( def create_resource_tagging_systems( - resource_arns: List[str], + resources: List[Dict], organization_key: str, ) -> List[System]: """ - Given a list of resource arns, build a list of systems object which represents + Given a list of resources, build a list of systems object which represents each resource. """ resource_generators = { @@ -329,25 +332,25 @@ def create_resource_tagging_systems( "s3": create_tagging_s3_system, } systems = [] - for arn in resource_arns: - arn_split = arn.split(":") + for resource in resources: + arn_split = resource["ResourceARN"].split(":") arn_resource_type = arn_split[2] resource_generator = resource_generators.get(arn_resource_type) if resource_generator: - generated_system = resource_generator(arn, organization_key) + generated_system = resource_generator(resource, organization_key) if generated_system: systems.append(generated_system) return systems def create_tagging_dynamodb_system( - arn: str, - organization_key: str, + resource: Dict[str, Any], organization_key: str ) -> Optional[System]: """ - Given an AWS arn for a dynamodb resource, returns a System representation + Given an AWS dynamodb resource, returns a System representation for dynamodb tables. """ + arn = resource["ResourceARN"] arn_split = arn.split(":") resource_name = arn_split[5] @@ -357,6 +360,7 @@ def create_tagging_dynamodb_system( fides_key=table_name, name=table_name, description=f"Fides Generated Description for DynamoDb table: {table_name}", + meta={(pair["Key"], pair["Value"]) for pair in resource.get("Tags", {})}, system_type="dynamodb_table", organization_fides_key=organization_key, fidesctl_meta=SystemMetadata( @@ -368,13 +372,13 @@ def create_tagging_dynamodb_system( def create_tagging_s3_system( - arn: str, - organization_key: str, + resource: Dict[str, Any], organization_key: str ) -> Optional[System]: """ - Given an AWS arn for a s3 resource, returns a System representation + Given an AWS s3 resource, returns a System representation for s3 buckets. """ + arn = resource["ResourceARN"] arn_split = arn.split(":") resource_name = arn_split[5] @@ -383,6 +387,7 @@ def create_tagging_s3_system( fides_key=bucket_name, name=bucket_name, description=f"Fides Generated Description for S3 bucket: {bucket_name}", + meta={(pair["Key"], pair["Value"]) for pair in resource.get("Tags", {})}, system_type="s3_bucket", organization_fides_key=organization_key, fidesctl_meta=SystemMetadata( diff --git a/src/fides/core/system.py b/src/fides/core/system.py index 59d23c5d30..4f2ac8c2f5 100644 --- a/src/fides/core/system.py +++ b/src/fides/core/system.py @@ -62,9 +62,9 @@ def generate_resource_tagging_systems( client = aws_connector.get_aws_client( service="resourcegroupstaggingapi", aws_config=aws_config ) - resource_arns = aws_connector.get_tagging_resources(client=client) + resources = aws_connector.get_tagging_resources(client=client) resource_tagging_systems = aws_connector.create_resource_tagging_systems( - resource_arns=resource_arns, organization_key=organization_key + resources=resources, organization_key=organization_key ) return resource_tagging_systems diff --git a/tests/ctl/connectors/test_aws.py b/tests/ctl/connectors/test_aws.py index 503b68d9fd..f17bbfbfeb 100644 --- a/tests/ctl/connectors/test_aws.py +++ b/tests/ctl/connectors/test_aws.py @@ -44,6 +44,7 @@ def redshift_systems() -> Generator: organization_fides_key="default_organization", name="redshift-cluster-1", description="Fides Generated Description for Redshift Cluster: redshift-cluster-1", + meta={}, fidesctl_meta=SystemMetadata( endpoint_address="redshift-cluster-1.c2angfh5kpo4.us-east-1.redshift.amazonaws.com", endpoint_port="5439", @@ -57,6 +58,7 @@ def redshift_systems() -> Generator: organization_fides_key="default_organization", name="redshift-cluster-2", description="Fides Generated Description for Redshift Cluster: redshift-cluster-2", + meta={}, fidesctl_meta=SystemMetadata( endpoint_address="redshift-cluster-2.c2angfh5kpo4.us-east-1.redshift.amazonaws.com", endpoint_port="5439", @@ -101,6 +103,7 @@ def rds_systems() -> Generator: organization_fides_key="default_organization", name="database-2", description="Fides Generated Description for RDS Cluster: database-2", + meta={}, fidesctl_meta=SystemMetadata( endpoint_address="database-2.cluster-ckrdpkkb4ukm.us-east-1.rds.amazonaws.com", endpoint_port="3306", @@ -114,6 +117,7 @@ def rds_systems() -> Generator: organization_fides_key="default_organization", name="database-1", description="Fides Generated Description for RDS Instance: database-1", + meta={}, fidesctl_meta=SystemMetadata( endpoint_address="database-1.ckrdpkkb4ukm.us-east-1.rds.amazonaws.com", endpoint_port="3306", diff --git a/tests/ctl/core/test_api.py b/tests/ctl/core/test_api.py index 17e5b3a962..2c692b31f2 100644 --- a/tests/ctl/core/test_api.py +++ b/tests/ctl/core/test_api.py @@ -546,6 +546,7 @@ def system_create_request_body(self) -> SystemSchema: cookie_refresh=True, uses_non_cookie_access=True, legitimate_interest_disclosure_url="http://www.example.com/legitimate_interest_disclosure", + meta={}, privacy_declarations=[ models.PrivacyDeclaration( name="declaration-name",