Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bigquery to generate endpoint #917

Merged
merged 3 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ The types of changes are:
* Sync CLI command now checks for untracked/unstaged files in the manifests dir [#869](https://github.com/ethyca/fides/pull/869)
* Add Okta support to the `/generate` endpoint [#842](https://github.com/ethyca/fides/pull/842)
* Add db support to `/generate` endpoint [849](https://github.com/ethyca/fides/pull/849)
* Add BigQuery support for the `generate` cli command [#814](https://github.com/ethyca/fides/pull/814)
* Add BigQuery support for the `generate` command and `/generate` endpoint [#814](https://github.com/ethyca/fides/pull/814) & [#917](https://github.com/ethyca/fides/pull/917)
* Added OpenAPI TypeScript client generation for the UI app. See the [README](/clients/admin-ui/src/types/api/README.md) for more details.
* Standardized API error parsing under `~/types/errors`

Expand Down
23 changes: 21 additions & 2 deletions src/fidesctl/api/routes/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@
from fidesctl.api.routes.util import (
API_PREFIX,
route_requires_aws_connector,
route_requires_bigquery_connector,
route_requires_okta_connector,
)
from fidesctl.api.sql_models import sql_model_map
from fidesctl.api.utils.api_router import APIRouter
from fidesctl.connectors.models import (
AWSConfig,
BigQueryConfig,
ConnectorAuthFailureException,
DatabaseConfig,
OktaConfig,
)
from fidesctl.core.dataset import generate_db_datasets
from fidesctl.core.dataset import generate_bigquery_datasets, generate_db_datasets
from fidesctl.core.system import generate_aws_systems, generate_okta_systems


Expand All @@ -35,6 +37,7 @@ class ValidTargets(str, Enum):
AWS = "aws"
DB = "db"
OKTA = "okta"
BIGQUERY = "bigquery"


class GenerateTypes(str, Enum):
Expand All @@ -52,7 +55,7 @@ class Generate(BaseModel):
Defines attributes for generating resources included in a request.
"""

config: Union[AWSConfig, OktaConfig, DatabaseConfig]
config: Union[AWSConfig, OktaConfig, DatabaseConfig, BigQueryConfig]
target: ValidTargets
type: GenerateTypes

Expand All @@ -68,6 +71,7 @@ def target_matches_type(cls, values: Dict) -> Dict:
("aws", "systems"),
("okta", "systems"),
("db", "datasets"),
("bigquery", "datasets"),
]
if target_type not in valid_target_types:
raise ValueError("Target and Type are not a valid match")
Expand Down Expand Up @@ -119,6 +123,7 @@ async def generate(
* AWS: Systems
* Okta: Systems
* DB: Datasets
* BigQuery: Datasets

In the future, this will include options for other Systems & Datasets,
examples include:
Expand All @@ -144,6 +149,10 @@ async def generate(
okta_config=generate_request_payload.generate.config,
organization=organization,
)
elif generate_request_payload.generate.target.lower() == "bigquery":
generate_results = generate_bigquery(
bigquery_config=generate_request_payload.generate.config,
)
except ConnectorAuthFailureException as error:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
Expand Down Expand Up @@ -187,3 +196,13 @@ def generate_db(db_config: DatabaseConfig) -> List[Dict[str, str]]:
db_datasets = generate_db_datasets(connection_string=db_config.connection_string)

return [i.dict(exclude_none=True) for i in db_datasets]


@route_requires_bigquery_connector
def generate_bigquery(bigquery_config: BigQueryConfig) -> List[Dict[str, str]]:
"""
Returns a list of datasets found in a BigQuery dataset
"""
log.info("Generating datasets from BigQuery")
bigquery_datasets = generate_bigquery_datasets(bigquery_config)
return [i.dict(exclude_none=True) for i in bigquery_datasets]
19 changes: 19 additions & 0 deletions src/fidesctl/api/routes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,22 @@ def wrapper_func(*args, **kwargs) -> Any: # type: ignore
return func(*args, **kwargs)

return update_wrapper(wrapper_func, func)


def route_requires_bigquery_connector(func: Callable) -> Callable:
"""
Function decorator raises a bad request http exception if
required modules are not installed for the GCP BigQuery connector
"""

def wrapper_func(*args, **kwargs) -> Any: # type: ignore
try:
import fidesctl.connectors.bigquery # pylint: disable=unused-import
except ModuleNotFoundError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Packages not found, ensure BigQuery is included: fidesctl[bigquery]",
)
return func(*args, **kwargs)

return update_wrapper(wrapper_func, func)
18 changes: 17 additions & 1 deletion src/fidesctl/api/routes/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
from fidesctl.api.routes.util import (
API_PREFIX,
route_requires_aws_connector,
route_requires_bigquery_connector,
route_requires_okta_connector,
)
from fidesctl.api.utils.api_router import APIRouter
from fidesctl.connectors.models import (
AWSConfig,
BigQueryConfig,
ConnectorAuthFailureException,
ConnectorFailureException,
OktaConfig,
Expand All @@ -28,14 +30,15 @@ class ValidationTarget(str, Enum):

AWS = "aws"
OKTA = "okta"
BIGQUERY = "bigquery"


class ValidateRequest(BaseModel):
"""
Validate endpoint request object
"""

config: Union[AWSConfig, OktaConfig]
config: Union[AWSConfig, BigQueryConfig, OktaConfig]
target: ValidationTarget


Expand Down Expand Up @@ -73,6 +76,7 @@ async def validate(
"""
validate_function_map: Dict[ValidationTarget, Callable] = {
ValidationTarget.AWS: validate_aws,
ValidationTarget.BIGQUERY: validate_bigquery,
ValidationTarget.OKTA: validate_okta,
}
validate_function = validate_function_map[validate_request_payload.target]
Expand Down Expand Up @@ -106,6 +110,18 @@ async def validate_aws(aws_config: AWSConfig) -> None:
aws_connector.validate_credentials(aws_config=aws_config)


@route_requires_bigquery_connector
async def validate_bigquery(bigquery_config: BigQueryConfig) -> None:
"""
Validates that given GCP BigQuery credentials are valid. Dependency
exception is raised if failure occurs.
"""
import fidesctl.connectors.bigquery as bigquery_connector

bigquery_engine = bigquery_connector.get_bigquery_engine(bigquery_config)
bigquery_connector.validate_bigquery_engine(bigquery_engine)


@route_requires_okta_connector
async def validate_okta(okta_config: OktaConfig) -> None:
"""
Expand Down
4 changes: 3 additions & 1 deletion src/fidesctl/connectors/bigquery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from json import loads

from sqlalchemy import create_engine, inspect
from sqlalchemy.engine import Engine

Expand Down Expand Up @@ -30,4 +32,4 @@ def validate_bigquery_engine(engine: Engine) -> None:
try:
inspector.get_schema_names()
except ClientError as error:
raise ConnectorFailureException(error.message)
raise ConnectorFailureException(loads(error.response.text)["error"]["message"])
16 changes: 14 additions & 2 deletions tests/api/test_generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=missing-docstring, redefined-outer-name
from json import dumps
from base64 import b64decode
from json import dumps, loads
from os import getenv

import pytest
Expand All @@ -15,6 +16,12 @@
"aws_access_key_id": getenv("AWS_ACCESS_KEY_ID", ""),
"aws_secret_access_key": getenv("AWS_SECRET_ACCESS_KEY", ""),
},
"bigquery": {
"dataset": "fidesopstest",
"keyfile_creds": loads(
b64decode(getenv("BIGQUERY_CONFIG", "e30=").encode("utf-8")).decode("utf-8")
),
},
"db": {
"connection_string": "postgresql+psycopg2://postgres:postgres@postgres-test:5432/postgres_example?"
},
Expand All @@ -28,7 +35,12 @@
@pytest.mark.external
@pytest.mark.parametrize(
"generate_type, generate_target",
[("systems", "aws"), ("systems", "okta"), ("datasets", "db")],
[
("systems", "aws"),
("systems", "okta"),
("datasets", "db"),
("datasets", "bigquery"),
],
)
def test_generate(
test_config: FidesctlConfig,
Expand Down
24 changes: 21 additions & 3 deletions tests/api/test_validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=missing-docstring, redefined-outer-name
from json import dumps
from base64 import b64decode
from json import dumps, loads
from os import getenv

import pytest
Expand All @@ -15,6 +16,12 @@
"aws_access_key_id": getenv("AWS_ACCESS_KEY_ID", ""),
"aws_secret_access_key": getenv("AWS_SECRET_ACCESS_KEY", ""),
},
"bigquery": {
"dataset": "fidesopstest",
"keyfile_creds": loads(
b64decode(getenv("BIGQUERY_CONFIG", "e30=").encode("utf-8")).decode("utf-8")
),
},
"okta": {
"orgUrl": "https://dev-78908748.okta.com",
"token": getenv("OKTA_CLIENT_TOKEN", ""),
Expand All @@ -23,7 +30,7 @@


@pytest.mark.external
@pytest.mark.parametrize("validate_target", ["aws", "okta"])
@pytest.mark.parametrize("validate_target", ["aws", "okta", "bigquery"])
def test_validate_success(
test_config: FidesctlConfig,
validate_target: str,
Expand Down Expand Up @@ -57,16 +64,27 @@ def test_validate_success(
"orgUrl": "https://dev-78908748.okta.com",
"token": "INVALID_TOKEN",
},
"bigquery": {
"dataset": "fidesopstest",
"keyfile_creds": loads(
b64decode(getenv("BIGQUERY_CONFIG", "e30=").encode("utf-8")).decode("utf-8")
),
},
}

EXTERNAL_FAILURE_CONFIG_BODY["bigquery"]["keyfile_creds"][
"project_id"
] = "INVALID_PROJECT_ID"

EXPECTED_FAILURE_MESSAGES = {
"aws": "Authentication failed validating config. The security token included in the request is invalid.",
"okta": "Authentication failed validating config. Invalid token provided",
"bigquery": "Unexpected failure validating config. Invalid project ID 'INVALID_PROJECT_ID'. Project IDs must contain 6-63 lowercase letters, digits, or dashes. Some project IDs also include domain name separated by a colon. IDs must start with a letter and may not end with a dash.",
}


@pytest.mark.external
@pytest.mark.parametrize("validate_target", ["aws", "okta"])
@pytest.mark.parametrize("validate_target", ["aws", "okta", "bigquery"])
def test_validate_failure(
test_config: FidesctlConfig,
validate_target: str,
Expand Down