Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PROD-2654 - MySQL on RDS as a detection/discovery source #5275

Merged
merged 20 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clients/admin-ui/src/types/api/models/ConnectionType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export enum ConnectionType {
MSSQL = "mssql",
MYSQL = "mysql",
POSTGRES = "postgres",
RDS_MYSQL = "rds_mysql",
REDSHIFT = "redshift",
S3 = "s3",
SAAS = "saas",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""add_rds_mysql_to_connector_type

Revision ID: 25fe48d56eaa
Revises: 9de4bb76307a
Create Date: 2024-09-20 17:06:31.944225

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "25fe48d56eaa"
down_revision = "9de4bb76307a"
branch_labels = None
depends_on = None


def upgrade():
# Add 'rds_mysql' to ConnectionType enum
op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old")
op.execute(
"""
CREATE TYPE connectiontype AS ENUM (
'mongodb',
'mysql',
'https',
'snowflake',
'redshift',
'mssql',
'mariadb',
'bigquery',
'saas',
'manual',
'manual_webhook',
'timescale',
'fides',
'sovrn',
'attentive',
'dynamodb',
'postgres',
'generic_consent_email',
'generic_erasure_email',
'scylla',
's3',
'google_cloud_sql_mysql',
'google_cloud_sql_postgres',
'dynamic_erasure_email',
'rds_mysql'
)
"""
)
op.execute(
"""
ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING
connection_type::text::connectiontype
"""
)
op.execute("DROP TYPE connectiontype_old")


def downgrade():
# Remove 'rds_mysql' from ConnectionType enum
op.execute("DELETE FROM connectionconfig WHERE connection_type IN ('rds_mysql')")
op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old")
op.execute(
"""
CREATE TYPE connectiontype AS ENUM (
'mongodb',
'mysql',
'https',
'snowflake',
'redshift',
'mssql',
'mariadb',
'bigquery',
'saas',
'manual',
'manual_webhook',
'timescale',
'fides',
'sovrn',
'attentive',
'dynamodb',
'postgres',
'generic_consent_email',
'generic_erasure_email',
'scylla',
's3',
'google_cloud_sql_mysql',
'google_cloud_sql_postgres',
'dynamic_erasure_email'
)
"""
)
op.execute(
"""
ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING
connection_type::text::connectiontype
"""
)
op.execute("DROP TYPE connectiontype_old")
2 changes: 2 additions & 0 deletions src/fides/api/models/connectionconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class ConnectionType(enum.Enum):
mssql = "mssql"
mysql = "mysql"
postgres = "postgres"
rds_mysql = "rds_mysql"
redshift = "redshift"
s3 = "s3"
saas = "saas"
Expand Down Expand Up @@ -83,6 +84,7 @@ def human_readable(self) -> str:
ConnectionType.mssql.value: "Microsoft SQL Server",
ConnectionType.mysql.value: "MySQL",
ConnectionType.postgres.value: "PostgreSQL",
ConnectionType.rds_mysql.value: "RDS MySQL",
ConnectionType.redshift.value: "Amazon Redshift",
ConnectionType.s3.value: "Amazon S3",
ConnectionType.saas.value: "SaaS",
Expand Down
9 changes: 8 additions & 1 deletion src/fides/api/schemas/connection_configuration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@
from fides.api.schemas.connection_configuration.connection_secrets_postgres import (
PostgreSQLSchema as PostgreSQLSchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_rds_mysql import (
RDSMySQLDocsSchema as RDSMySQLDocsSchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_rds_mysql import (
RDSMySQLSchema as RDSMySQLSchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_redshift import (
RedshiftDocsSchema as RedshiftDocsSchema,
)
Expand Down Expand Up @@ -152,9 +158,10 @@
ConnectionType.mssql.value: MicrosoftSQLServerSchema,
ConnectionType.mysql.value: MySQLSchema,
ConnectionType.postgres.value: PostgreSQLSchema,
ConnectionType.rds_mysql.value: RDSMySQLSchema,
ConnectionType.redshift.value: RedshiftSchema,
ConnectionType.saas.value: SaaSSchema,
ConnectionType.s3.value: S3Schema,
ConnectionType.saas.value: SaaSSchema,
ConnectionType.scylla.value: ScyllaSchema,
ConnectionType.snowflake.value: SnowflakeSchema,
ConnectionType.sovrn.value: SovrnSchema,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class MySQLSchema(ConnectionConfigSecretsSchema):
description="The hostname or IP address of the server where the database is running.",
)
port: int = Field(
3306,
default=3306,
title="Port",
description="The network port number on which the server is listening for incoming connections (default: 3306).",
)
Expand All @@ -32,8 +32,8 @@ class MySQLSchema(ConnectionConfigSecretsSchema):
json_schema_extra={"sensitive": True},
)
dbname: str = Field(
description="The name of the specific database within the database server that you want to connect to.",
title="Database",
description="The name of the specific database within the database server that you want to connect to.",
)
ssh_required: bool = Field(
False,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pydantic import Field

from fides.api.schemas.base_class import NoValidationSchema
from fides.api.schemas.connection_configuration.connection_secrets_base_aws import (
BaseAWSSchema,
)


class RDSMySQLSchema(BaseAWSSchema):
"""
Schema to validate the secrets needed to connect to a RDS MySQL Database
"""

username: str = Field(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I might rename this db_username to clarify it's not an AWS IAM username

default="fides_explorer",
andres-torres-marroquin marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: fides_explorer feels a bit arbitrary/random to me as a default, we don't use "explorer" anywhere else in the product and it's not really a standard. I'd probably just stick with fides or fides_service_user or fides_db_user or something like that

title="Username",
description="The user account used to authenticate and access the databases.",
)
region: str = Field(
title="Region",
description="The AWS region where the RDS instances are located.",
)
ca_cert_url: str = Field(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

knowing what i know about how this field is used downstream in the monitor, and the fact that it's user-input data - i think we need some more validation on this field. at a minimum, we should be using the AnyHttpUrl pydantic class (or even better, using @pattisdr 's AnyHttpUrlString in `fideslang) to put some bounds on this field.

additionally, we should probably perform some more checks on the URL before we fetch it and save it to the filesystem downstream. i'm not exactly sure the best way to check that, but it feels like it'd be wise...

if you feel like it makes more sense to just hardcode the default value here rather than accept user config for it, that may be OK. i know i'd vouched for a configurable field earlier, since i thought the flexibility would be a good fallback in case a customer isn't using this global AWS cert that we expect, but i hadn't really considered the security implications. sorry to go back and forth on this! i still think the flexibility would be nice, but we need to be able to do it in a reasonably secure way, so let's weigh that accordingly.

if you do decide to remove the flexibility, i think it would be good for us to confirm with our prospective customers of this feature whether the global AWS cert we're defaulting to is one that would work for their use case 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I confirmed with Michael that they use the standard certificate bundle.

default="https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem",
title="CA Certificate URL",
description="The URL to the CA certificate used to authenticate the RDS instances.",
)


class RDSMySQLDocsSchema(RDSMySQLSchema, NoValidationSchema):
"""RDS MySQL Secrets Schema for API Docs"""
90 changes: 87 additions & 3 deletions tests/ops/api/v1/endpoints/test_connection_template_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def test_search_system_type(self, api_client, generate_auth_header, url):
resp = api_client.get(url + "system_type=database", headers=auth_header)
assert resp.status_code == 200
data = resp.json()["items"]
assert len(data) == 14
assert len(data) == 15

def test_search_system_type_and_connection_type(
self,
Expand Down Expand Up @@ -913,7 +913,9 @@ def test_get_connection_secret_schema_dynamodb(
"description": "Determines which type of "
"authentication method to use "
"for connecting to Amazon Web "
"Services.",
"Services. Currently accepted "
"values are: `secret_keys` or "
"`automatic`.",
"title": "Authentication Method",
},
"aws_access_key_id": {
Expand Down Expand Up @@ -1261,7 +1263,9 @@ def test_get_connection_secret_schema_s3(
"description": "Determines which type of "
"authentication method to use "
"for connecting to Amazon Web "
"Services.",
"Services. Currently accepted "
"values are: `secret_keys` or "
"`automatic`.",
"title": "Authentication Method",
},
"aws_access_key_id": {
Expand Down Expand Up @@ -1294,6 +1298,86 @@ def test_get_connection_secret_schema_s3(
"type": "object",
}

def test_get_connection_secret_schema_rds(
self, api_client: TestClient, generate_auth_header, base_url
) -> None:
auth_header = generate_auth_header(scopes=[CONNECTION_TYPE_READ])
resp = api_client.get(
base_url.format(connection_type="rds_mysql"), headers=auth_header
)
assert resp.json() == {
"definitions": {
"AWSAuthMethod": {
"enum": ["automatic", "secret_keys"],
"title": "AWSAuthMethod",
"type": "string",
}
},
"description": "Schema to validate the secrets needed to connect to a RDS "
"MySQL Database",
"properties": {
"auth_method": {
"allOf": [{"$ref": "#/definitions/AWSAuthMethod"}],
"description": "Determines which type of "
"authentication method to use "
"for connecting to Amazon Web "
"Services. Currently accepted "
"values are: `secret_keys` or "
"`automatic`.",
"title": "Authentication Method",
},
"aws_access_key_id": {
"description": "Part of the credentials "
"that provide access to "
"your AWS account.",
"title": "Access Key ID",
"type": "string",
},
"aws_assume_role_arn": {
"description": "If provided, the ARN "
"of the role that "
"should be assumed to "
"connect to AWS.",
"title": "Assume Role ARN",
"type": "string",
},
"aws_secret_access_key": {
"description": "Part of the "
"credentials that "
"provide access to "
"your AWS account.",
"sensitive": True,
"title": "Secret Access Key",
"type": "string",
},
"ca_cert_url": {
"default": "https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem",
"description": "The URL to the CA certificate "
"used to authenticate the RDS "
"instances.",
"title": "CA Certificate URL",
"type": "string",
},
"region": {
"description": "The AWS region where the RDS "
"instances are located.",
"title": "Region",
"type": "string",
},
"username": {
"default": "fides_explorer",
"description": "The user account used to "
"authenticate and access the "
"databases.",
"title": "Username",
"type": "string",
},
},
"required": ["auth_method", "region"],
"title": "RDSMySQLSchema",
"type": "object",
}

def test_get_connection_secret_schema_snowflake(
self, api_client: TestClient, generate_auth_header, base_url
) -> None:
Expand Down
Loading