Skip to content

Commit

Permalink
DynamoDB Connector (#2998)
Browse files Browse the repository at this point in the history
Co-authored-by: Sean Preston <[email protected]>
Co-authored-by: Adrian Galvan <[email protected]>
  • Loading branch information
3 people authored May 10, 2023
1 parent af56c29 commit ad8fda3
Show file tree
Hide file tree
Showing 31 changed files with 1,084 additions and 10 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/backend_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ jobs:
OKTA_CLIENT_TOKEN: ${{ secrets.OKTA_FIDESCTL_CLIENT_TOKEN }}
AWS_DEFAULT_REGION: us-east-1
BIGQUERY_CONFIG: ${{ secrets.BIGQUERY_CONFIG }}
DYNAMODB_REGION: ${{ secrets.DYNAMODB_REGION }}
DYNAMODB_ACCESS_KEY_ID: ${{ secrets.DYNAMODB_ACCESS_KEY_ID }}
DYNAMODB_ACCESS_KEY: ${{ secrets.DYNAMODB_ACCESS_KEY }}

External-Datastores:
needs: Build
Expand Down Expand Up @@ -259,6 +262,10 @@ jobs:
BIGQUERY_KEYFILE_CREDS: ${{ secrets.BIGQUERY_KEYFILE_CREDS }}
BIGQUERY_DATASET: fidesopstest
SNOWFLAKE_TEST_URI: ${{ secrets.SNOWFLAKE_TEST_URI }}
DYNAMODB_REGION: ${{ secrets.DYNAMODB_REGION }}
DYNAMODB_ACCESS_KEY_ID: ${{ secrets.DYNAMODB_ACCESS_KEY_ID }}
DYNAMODB_ACCESS_KEY: ${{ secrets.DYNAMODB_ACCESS_KEY }}

run: nox -s "pytest(ops-external-datastores)"

External-SaaS-Connectors:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The types of changes are:

### Added

- Connector for DynamoDB [#2998](https://github.com/ethyca/fides/pull/2998)
- Access and erasure support for Amplitude [#2569](https://github.com/ethyca/fides/pull/2569)
- Access and erasure support for Gorgias [#2444](https://github.com/ethyca/fides/pull/2444)
- Privacy Experience Bulk Create, Bulk Update, and Detail Endpoints [#3185](https://github.com/ethyca/fides/pull/3185)
Expand Down
10 changes: 10 additions & 0 deletions clients/admin-ui/public/images/connector-logos/dynamodb.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export const CONNECTION_TYPE_LOGO_MAP = new Map<ConnectionType, string>([
[ConnectionType.TIMESCALE, "timescaledb.svg"],
[ConnectionType.SOVRN, "sovrn.svg"],
[ConnectionType.ATTENTIVE, "attentive.svg"],
[ConnectionType.DYNAMODB, "dynamodb.svg"],
]);

/**
Expand Down
1 change: 1 addition & 0 deletions clients/admin-ui/src/types/api/models/ConnectionType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ export enum ConnectionType {
MANUAL_WEBHOOK = "manual_webhook",
TIMESCALE = "timescale",
FIDES = "fides",
DYNAMODB = "dynamodb",
}
80 changes: 80 additions & 0 deletions data/dataset/dynamodb_example_test_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
dataset:
- fides_key: dynamodb_example_test_dataset
name: DynamoDB Example Test Dataset
description: Example of a DynamoDB dataset containing a single customer table
collections:
- name: customer_identifier
fields:
- name: customer_id
data_categories: [user.unique_id]
fides_meta:
references:
- dataset: dynamodb_example_test_dataset
field: customer.id
direction: to
- dataset: dynamodb_example_test_dataset
field: login.customer_id
direction: to
- name: created
data_categories: [system.operations]
- name: email
data_categories: [user.contact.email]
fides_meta:
primary_key: True
identity: email
data_type: string
- name: name
data_categories: [user.name]
- name: address
fields:
- name: city
data_categories: [user.contact.address.city]
- name: house
data_categories: [user.contact.address.street]
- name: id
data_categories: [system.operations]
fides_meta:
primary_key: True
- name: state
data_categories: [user.contact.address.state]
- name: street
data_categories: [user.contact.address.street]
- name: zip
data_categories: [user.contact.address.postal_code]
- name: customer
fields:
- name: address_id
data_categories: [system.operations]
fides_meta:
references:
- dataset: dynamodb_example_test_dataset
field: address.id
direction: to
- name: created
data_categories: [system.operations]
- name: customer_email
data_categories: [user.contact.email]
fides_meta:
identity: email
data_type: string
- name: id
data_categories: [user.unique_id]
fides_meta:
primary_key: True
- name: name
data_categories: [user.name]
- name: login
fields:
- name: customer_id
data_categories: [user.unique_id]
fides_meta:
primary_key: True
- name: login_date
data_categories: [system.operations]
- name: name
data_categories: [user.name]
- name: customer_email
data_categories: [user.contact.email]
fides_meta:
identity: email
data_type: string
5 changes: 5 additions & 0 deletions noxfiles/run_infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
"snowflake": ["SNOWFLAKE_TEST_URI"],
"redshift": ["REDSHIFT_TEST_URI", "REDSHIFT_TEST_DB_SCHEMA"],
"bigquery": ["BIGQUERY_KEYFILE_CREDS", "BIGQUERY_DATASET"],
"dynamodb": [
"DYNAMODB_REGION",
"DYNAMODB_ACCESS_KEY_ID",
"DYNAMODB_ACCESS_KEY",
],
}
EXTERNAL_DATASTORES = list(EXTERNAL_DATASTORE_CONFIG.keys())
ALL_DATASTORES = DOCKERFILE_DATASTORES + EXTERNAL_DATASTORES
Expand Down
12 changes: 12 additions & 0 deletions noxfiles/test_setup_nox.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ def pytest_ctl(session: Session, mark: str, coverage_arg: str) -> None:
"OKTA_CLIENT_TOKEN",
"-e",
"BIGQUERY_CONFIG",
"-e",
"DYNAMODB_REGION",
"-e",
"DYNAMODB_ACCESS_KEY_ID",
"-e",
"DYNAMODB_ACCESS_KEY",
CI_ARGS_EXEC,
CONTAINER_NAME,
"pytest",
Expand Down Expand Up @@ -133,6 +139,12 @@ def pytest_ops(session: Session, mark: str, coverage_arg: str) -> None:
"BIGQUERY_KEYFILE_CREDS",
"-e",
"BIGQUERY_DATASET",
"-e",
"DYNAMODB_REGION",
"-e",
"DYNAMODB_ACCESS_KEY_ID",
"-e",
"DYNAMODB_ACCESS_KEY",
CI_ARGS_EXEC,
CONTAINER_NAME,
"pytest",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ markers = [
"integration_twilio_conversations",
"integration_adobe_campaign",
"integration_firebase_auth",
"integration_dynamodb",
"unit_saas"
]
asyncio_mode = "auto"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""add dynamodb to connector list
Revision ID: fc04e3e637c0
Revises: 15a3e7483249
Create Date: 2023-04-14 10:19:50.681752
"""
from alembic import op

# revision identifiers, used by Alembic.
revision = "fc04e3e637c0"
down_revision = "15a3e7483249"
branch_labels = None
depends_on = None


def upgrade():
# Add 'dynamodb' to ConnectionType enum
op.execute("alter type connectiontype rename to connectiontype_old")
op.execute(
"create type connectiontype as enum('postgres', 'mongodb', 'mysql', 'https', "
"'snowflake', 'redshift', 'mssql', 'mariadb', 'bigquery', 'saas', 'manual', "
"'manual_webhook', 'timescale', 'fides', 'sovrn', 'attentive', 'dynamodb')"
)
op.execute(
(
"alter table connectionconfig alter column connection_type type connectiontype using "
"connection_type::text::connectiontype"
)
)
op.execute("drop type connectiontype_old")


def downgrade():
# Remove dynamodb from the connectiontype enum
op.execute("delete from connectionconfig where connection_type in ('dynamodb')")
op.execute("alter type connectiontype rename to connectiontype_old")
op.execute(
"create type connectiontype as enum('postgres', 'mongodb', 'mysql', 'https', "
"'snowflake', 'redshift', 'mssql', 'mariadb', 'bigquery', 'saas', 'manual', "
"'email', 'manual_webhook', 'timescale', 'fides', 'sovrn', 'attentive')"
)
op.execute(
(
"alter table connectionconfig alter column connection_type type connectiontype using "
"connection_type::text::connectiontype"
)
)
op.execute("drop type connectiontype_old")
43 changes: 39 additions & 4 deletions src/fides/api/ctl/routes/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
DatabaseConfig,
OktaConfig,
)
from fides.core.dataset import generate_bigquery_datasets, generate_db_datasets
from fides.core.dataset import (
generate_bigquery_datasets,
generate_db_datasets,
generate_dynamo_db_datasets,
)
from fides.core.system import generate_aws_systems, generate_okta_systems
from fides.core.utils import validate_db_engine

Expand All @@ -39,6 +43,7 @@ class ValidTargets(str, Enum):
DB = "db"
OKTA = "okta"
BIGQUERY = "bigquery"
DYNAMODB = "dynamodb"


class GenerateTypes(str, Enum):
Expand Down Expand Up @@ -70,9 +75,10 @@ def target_matches_type(cls, values: Dict) -> Dict:
target_type = (values.get("target"), values.get("type"))
valid_target_types = [
("aws", "systems"),
("okta", "systems"),
("db", "datasets"),
("bigquery", "datasets"),
("db", "datasets"),
("dynamodb", "datasets"),
("okta", "systems"),
]
if target_type not in valid_target_types:
raise ValueError("Target and Type are not a valid match")
Expand Down Expand Up @@ -129,6 +135,7 @@ async def generate(
* Okta: Systems
* DB: Datasets
* BigQuery: Datasets
* DynamoDB: Datasets
In the future, this will include options for other Systems & Datasets,
examples include:
Expand All @@ -141,7 +148,6 @@ async def generate(
)
generate_config = generate_request_payload.generate.config
generate_target = generate_request_payload.generate.target.lower()

try:
if generate_target == "aws" and isinstance(generate_config, AWSConfig):
generate_results = generate_aws(
Expand All @@ -167,6 +173,12 @@ async def generate(
bigquery_config=generate_config,
)

elif generate_target == "dynamodb" and isinstance(generate_config, AWSConfig):
generate_results = generate_dynamodb(
aws_config=generate_config,
organization=organization,
)

except ConnectorAuthFailureException as error:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
Expand Down Expand Up @@ -199,6 +211,29 @@ def generate_aws(
return [i.dict(exclude_none=True) for i in aws_systems]


def generate_dynamodb(
aws_config: AWSConfig, organization: Organization
) -> List[Dict[str, str]]:
"""
Returns a list of DynamoDB datasets found in AWS.
"""
from fides.connectors.aws import validate_credentials

log.info("Validating AWS credentials")
try:
validate_credentials(aws_config)
except ConnectorAuthFailureException as error:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=str(error),
)

log.info("Generating datasets from AWS DynamoDB")
aws_resources = [generate_dynamo_db_datasets(aws_config=aws_config)]

return [i.dict(exclude_none=True) for i in aws_resources]


async def generate_okta(
okta_config: OktaConfig, organization: Organization
) -> List[Dict[str, str]]:
Expand Down
2 changes: 2 additions & 0 deletions src/fides/api/ops/models/connectionconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class ConnectionType(enum.Enum):
manual = "manual" # Run as part of the traversal
sovrn = "sovrn"
attentive = "attentive"
dynamodb = "dynamodb"
manual_webhook = "manual_webhook" # Run before the traversal
timescale = "timescale"
fides = "fides"
Expand All @@ -63,6 +64,7 @@ def human_readable(self) -> str:
readable_mapping: Dict[str, str] = {
ConnectionType.attentive.value: "Attentive",
ConnectionType.bigquery.value: "BigQuery",
ConnectionType.dynamodb.value: "DynamoDB",
ConnectionType.fides.value: "Fides Connector",
ConnectionType.https.value: "Policy Webhook",
ConnectionType.manual.value: "Manual Connector",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
from fides.api.ops.schemas.connection_configuration.connection_secrets_bigquery import (
BigQuerySchema as BigQuerySchema,
)
from fides.api.ops.schemas.connection_configuration.connection_secrets_dynamodb import (
DynamoDBDocsSchema as DynamoDBDocsSchema,
)
from fides.api.ops.schemas.connection_configuration.connection_secrets_dynamodb import (
DynamoDBSchema as DynamoDBSchema,
)
from fides.api.ops.schemas.connection_configuration.connection_secrets_email import (
EmailDocsSchema as EmailDocsSchema,
)
Expand Down Expand Up @@ -101,6 +107,7 @@
secrets_schemas: Dict[str, Any] = {
ConnectionType.attentive.value: AttentiveSchema,
ConnectionType.bigquery.value: BigQuerySchema,
ConnectionType.dynamodb.value: DynamoDBSchema,
ConnectionType.fides.value: FidesConnectorSchema,
ConnectionType.https.value: HttpsSchema,
ConnectionType.manual_webhook.value: ManualWebhookSchema,
Expand Down Expand Up @@ -158,4 +165,5 @@ def get_connection_secrets_schema(
TimescaleDocsSchema,
FidesDocsSchema,
SovrnDocsSchema,
DynamoDBDocsSchema,
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import List

from fides.api.ops.schemas.base_class import NoValidationSchema
from fides.api.ops.schemas.connection_configuration.connection_secrets import (
ConnectionConfigSecretsSchema,
)


class DynamoDBSchema(ConnectionConfigSecretsSchema):
"""Schema to validate the secrets needed to connect to an Amazon DynamoDB cluster"""

region_name: str
aws_secret_access_key: str
aws_access_key_id: str

_required_components: List[str] = [
"region_name",
"aws_secret_access_key",
"aws_access_key_id",
]


class DynamoDBDocsSchema(DynamoDBSchema, NoValidationSchema):
"""DynamoDB Secrets Schema for API Docs"""
Loading

0 comments on commit ad8fda3

Please sign in to comment.