From f469febd27eaea0db85676ac7b2fce4297113f2d Mon Sep 17 00:00:00 2001 From: treff7es Date: Wed, 22 Dec 2021 15:09:03 +0100 Subject: [PATCH 1/5] Add config option to set bigquery credential in source config --- metadata-ingestion/source_docs/bigquery.md | 109 ++++++++++++++---- .../datahub/ingestion/source/sql/bigquery.py | 52 +++++++++ .../tests/unit/test_bigquery_source.py | 65 +++++++++++ 3 files changed, 202 insertions(+), 24 deletions(-) create mode 100644 metadata-ingestion/tests/unit/test_bigquery_source.py diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md index 567fecca490c8..7a5962f0d6db1 100644 --- a/metadata-ingestion/source_docs/bigquery.md +++ b/metadata-ingestion/source_docs/bigquery.md @@ -6,6 +6,63 @@ For context on getting started with ingestion, check out our [metadata ingestion To install this plugin, run `pip install 'acryl-datahub[bigquery]'`. +## Prerequisites +### Create a datahub profile in GCP: +1. Create a custom role for datahub (https://cloud.google.com/iam/docs/creating-custom-roles#creating_a_custom_role) +2. Grant the following permissions to this role: +``` + bigquery.datasets.get + bigquery.datasets.getIamPolicy + bigquery.jobs.create + bigquery.jobs.list + bigquery.jobs.listAll + bigquery.models.getMetadata + bigquery.models.list + bigquery.routines.get + bigquery.routines.list + bigquery.tables.create # Needs for profiling + bigquery.tables.get + bigquery.tables.getData # Needs for profiling + bigquery.tables.list + logging.logEntries.list # Needs for lineage generation + resourcemanager.projects.get +``` +### Create a service account: + +1. Setup a ServiceAccount (https://cloud.google.com/iam/docs/creating-managing-service-accounts#iam-service-accounts-create-console) +and assign the previously created role to this service account +2. Download a service account JSON keyfile: + The credential file looks like this: +```json +{ + "type": "service_account", + "project_id": "project-id-1234567", + "private_key_id": "d0121d0000882411234e11166c6aaa23ed5d74e0", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----", + "client_email": "test@suppproject-id-1234567.iam.gserviceaccount.com", + "client_id": "113545814931671546333", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%suppproject-id-1234567.iam.gserviceaccount.com" +} +``` +3. Here you have two options: + Either point to it using an environment variable: + $ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json" + + *or* + + Set credential config in your source based on the credential json file like this: +```yml + credential: + project_id: project-id-1234567 + private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0" + private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n" + client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com" + client_id: "123456678890" +``` + ## Capabilities This plugin extracts the following: @@ -44,30 +101,34 @@ Note that a `.` is used to denote nested fields in the YAML recipe. As a SQL-based service, the Athena integration is also supported by our SQL profiler. See [here](./sql_profiles.md) for more details on configuration. -| Field | Required | Default | Description | -| ----------------------------------------- | -------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `project_id` | | Autodetected | Project ID to ingest from. If not specified, will infer from environment. | -| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | -| `options.