From 6505cf78508464200688a7e6bdf1fcc87bebe4a4 Mon Sep 17 00:00:00 2001 From: Colin Saliceti Date: Fri, 11 Oct 2024 17:55:59 +0100 Subject: [PATCH] wip --- aks/dfe_analytics/README.md | 13 ++ aks/dfe_analytics/data.tf | 11 ++ aks/dfe_analytics/files/events.json.tmpl | 145 +++++++++++++++++++++++ aks/dfe_analytics/outputs.tf | 25 ++++ aks/dfe_analytics/provider.tf | 13 ++ aks/dfe_analytics/resources.tf | 87 ++++++++++++++ aks/dfe_analytics/tfdocs.md | 62 ++++++++++ aks/dfe_analytics/variables.tf | 87 ++++++++++++++ 8 files changed, 443 insertions(+) create mode 100644 aks/dfe_analytics/README.md create mode 100644 aks/dfe_analytics/data.tf create mode 100644 aks/dfe_analytics/files/events.json.tmpl create mode 100644 aks/dfe_analytics/outputs.tf create mode 100644 aks/dfe_analytics/provider.tf create mode 100644 aks/dfe_analytics/resources.tf create mode 100644 aks/dfe_analytics/tfdocs.md create mode 100644 aks/dfe_analytics/variables.tf diff --git a/aks/dfe_analytics/README.md b/aks/dfe_analytics/README.md new file mode 100644 index 0000000..dc23bcb --- /dev/null +++ b/aks/dfe_analytics/README.md @@ -0,0 +1,13 @@ +# DfE Analytics + +Create resources in Google cloud Bigquery and provides the required variables to applications so they can send events. + +## GCP provider - Command line + +## GCP provider - Github actions + +## Create or reuse existing resources + +## Examples + +## How to configure application diff --git a/aks/dfe_analytics/data.tf b/aks/dfe_analytics/data.tf new file mode 100644 index 0000000..79b122b --- /dev/null +++ b/aks/dfe_analytics/data.tf @@ -0,0 +1,11 @@ +module "cluster_data" { + source = "../cluster_data" + name = var.cluster +} + +data "azurerm_client_config" "current" {} + +data "azurerm_user_assigned_identity" "gcp_wif" { + name = "${var.azure_resource_prefix}-gcp-wif-${var.cluster}-${var.namespace}-id" + resource_group_name = module.cluster_data.configuration_map.resource_group_name +} diff --git a/aks/dfe_analytics/files/events.json.tmpl b/aks/dfe_analytics/files/events.json.tmpl new file mode 100644 index 0000000..13ba945 --- /dev/null +++ b/aks/dfe_analytics/files/events.json.tmpl @@ -0,0 +1,145 @@ +[ + { + "description": "The timestamp at which the event occurred in the application.", + "mode": "REQUIRED", + "name": "occurred_at", + "type": "TIMESTAMP" + }, + { + "description": "The type of the event, for example web_request. This determines the schema of the data which will be included in the data field.", + "mode": "REQUIRED", + "name": "event_type", + "type": "STRING" + }, + { + "description": "If a user was logged in when they sent a web request event that is this event, then this is the UID of this user.", + "name": "user_id", + "type": "STRING" + }, + { + "description": "Unique ID of the web request, if this event is a web request event", + "name": "request_uuid", + "type": "STRING" + }, + { + "description": "Whether this web request was a GET or POST request, if this event is a web request event.", + "name": "request_method", + "type": "STRING" + }, + { + "description": "The path, starting with a / and excluding any query parameters, of this web request, if this event is a web request", + "name": "request_path", + "type": "STRING" + }, + { + "description": "The user agent of this web request, if this event is a web request. Allows a user's browser and operating system to be identified", + "name": "request_user_agent", + "type": "STRING" + }, + { + "description": "The URL of any page the user was viewing when they initiated this web request, if this event is a web request. This is the full URL, including protocol (https://) and any query parameters, if the browser shared these with our application as part of the web request. It is very common for this referer to be truncated for referrals from external sites.", + "name": "request_referer", + "type": "STRING" + }, + { + "description": "ARRAY of STRUCTs, each with a key and a value. Contains any query parameters that were sent to the application as part of this web reques, if this event is a web request.", + "fields": [ + { + "description": "Name of the query parameter e.g. if the URL ended ?foo=bar then this will be foo.", + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "description": "Contents of the query parameter e.g. if the URL ended ?foo=bar then this will be bar.", + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "request_query", + "type": "RECORD" + }, + { + "description": "Content type of any data that was returned to the browser following this web request, if this event is a web request. For example, 'text/html; charset=utf-8'. Image views, for example, may have a non-text/html content type.", + "name": "response_content_type", + "type": "STRING" + }, + { + "description": "HTTP response code returned by the application in response to this web request, if this event is a web request. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status.", + "name": "response_status", + "type": "STRING" + }, + { + "description": "ARRAY of STRUCTs, each with a key and a value. Contains a set of data points appropriate to the event_type of this event. For example, if this event was an entity create, update, delete or import event, data will contain the values of each field in the database after this event took place - according to the settings in the analytics.yml configured for this instance of dfe-analytics. Value be anonymised as a one way hash, depending on configuration settings.", + "fields": [ + { + "description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "DATA", + "type": "RECORD" + }, + { + "description": "If event_type was an entity create, update, delete or import event, the name of the table in the database that this entity is stored in. NULL otherwise.", + "name": "entity_table_name", + "type": "STRING" + }, + { + "description": "Currently left blank for future use.", + "mode": "REPEATED", + "name": "event_tags", + "type": "STRING" + }, + { + "description": "One way hash of a combination of the user's IP address and user agent, if this event is a web request. Can be used to identify the user anonymously, even when user_id is not set. Cannot be used to identify the user over a time period of longer than about a month, because of IP address changes and browser updates.", + "name": "anonymised_user_agent_and_ip", + "type": "STRING" + }, + { + "description": "The application environment that the event was streamed from.", + "name": "environment", + "type": "STRING" + }, + { + "description": "The namespace of the instance of dfe-analytics that streamed this event. For example this might identify the name of the service that streamed the event.", + "name": "namespace", + "type": "STRING" + }, + { + "description": "Defined in the same way as the DATA ARRAY of STRUCTs, except containing fields configured to be hidden in analytics_hidden_pii.yml", + "fields": [ + { + "description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REQUIRED", + "name": "KEY", + "type": "STRING" + }, + { + "description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REPEATED", + "name": "value", + "policyTags": { + "names": [ + "${policy_tag_name}" + ] + }, + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "hidden_DATA", + "type": "RECORD" + } +] diff --git a/aks/dfe_analytics/outputs.tf b/aks/dfe_analytics/outputs.tf new file mode 100644 index 0000000..9bc81cf --- /dev/null +++ b/aks/dfe_analytics/outputs.tf @@ -0,0 +1,25 @@ +output "bigquery_project_id" { + description = "ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'..." + value = var.gcp_project_id +} +output "bigquery_table_name" { + description = "Biquery events table name" + value = local.gcp_table_name +} +output "bigquery_dataset" { + description = "Bigquery dataset name" + value = local.gcp_dataset_name +} +output "google_cloud_credentials" { + description = "Credentials for Google workload identity federation" + value = local.gcp_credentials +} +output "dfe_analytics_variables_map" { + description = "Map of environment variables required for dfe-analytics. Merge with application configuration secrets." + value = { + BIGQUERY_PROJECT_ID = var.gcp_project_id + BIGQUERY_TABLE_NAME = local.gcp_table_name + BIGQUERY_DATASET = local.gcp_dataset_name + GOOGLE_CLOUD_CREDENTIALS = local.gcp_credentials + } +} diff --git a/aks/dfe_analytics/provider.tf b/aks/dfe_analytics/provider.tf new file mode 100644 index 0000000..251bf7c --- /dev/null +++ b/aks/dfe_analytics/provider.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "6.6.0" + } + } +} + +provider "google" { + project = var.gcp_project_id + region = local.gcp_region +} diff --git a/aks/dfe_analytics/resources.tf b/aks/dfe_analytics/resources.tf new file mode 100644 index 0000000..5745938 --- /dev/null +++ b/aks/dfe_analytics/resources.tf @@ -0,0 +1,87 @@ +resource "google_service_account" "appender" { + account_id = "appender-${var.service_short}-${var.environment}" + display_name = "Service Account appender to ${var.service_short} in ${var.environment} environment" +} + +resource "google_service_account_iam_binding" "appender" { + service_account_id = google_service_account.appender.name + role = "roles/iam.workloadIdentityUser" + + members = [ + local.gcp_principal_with_subject + ] +} + +# Create key ring if it doesn't exist +resource "google_kms_key_ring" "bigquery" { + count = var.gcp_keyring == null ? 1 : 0 + + name = local.gcp_key_ring + location = local.gcp_region +} + +# Create key if it doesn't exist +resource "google_kms_crypto_key" "bigquery" { + count = var.gcp_key == null ? 1 : 0 + + name = local.gcp_key + key_ring = google_kms_key_ring.bigquery[0].id +} + +# Add permission if key didn't exist +data "google_bigquery_default_service_account" "main" {} +resource "google_kms_crypto_key_iam_member" "bigquery" { + count = var.gcp_key == null ? 1 : 0 + + crypto_key_id = google_kms_crypto_key.bigquery[0].id + role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" + member = "serviceAccount:${data.google_bigquery_default_service_account.main.email}" +} + +# Create dataset if it doesn't exist +resource "google_bigquery_dataset" "main" { + count = var.gcp_dataset == null ? 1 : 0 + + dataset_id = local.gcp_dataset_name + location = local.gcp_region + default_encryption_configuration { + kms_key_name = google_kms_crypto_key_iam_member.bigquery[0].crypto_key_id + } +} + +# Add service account permission to dataset, wether we create it or it already exists +resource "google_bigquery_dataset_iam_binding" "appender" { + dataset_id = var.gcp_dataset == null ? google_bigquery_dataset.main[0].dataset_id : var.gcp_dataset + role = "projects/${var.gcp_project_id}/roles/bigquery_appender_custom" + + members = [ + "serviceAccount:${google_service_account.appender.email}", + ] +} + +# Create table if dataset doesn't exist +resource "google_bigquery_table" "events" { + count = var.gcp_dataset == null ? 1 : 0 + + dataset_id = google_bigquery_dataset.main[0].dataset_id + table_id = local.gcp_table_name + description = "Events streamed into the BigQuery from the application" + clustering = ["event_type"] + deletion_protection = var.gcp_table_deletion_protection + require_partition_filter = false + + encryption_configuration { + kms_key_name = google_kms_crypto_key_iam_member.bigquery[0].crypto_key_id + } + + time_partitioning { + type = "DAY" + field = "occurred_at" + } + + # https://github.com/DFE-Digital/dfe-analytics/blob/main/docs/create-events-table.sql + schema = templatefile( + "${path.module}/file/events.json.tmpl", + { policy_tag_name = local.gcp_policy_tag_name } + ) +} diff --git a/aks/dfe_analytics/tfdocs.md b/aks/dfe_analytics/tfdocs.md new file mode 100644 index 0000000..c3e0f35 --- /dev/null +++ b/aks/dfe_analytics/tfdocs.md @@ -0,0 +1,62 @@ +## Requirements + +| Name | Version | +|------|---------| +| [google](#requirement\_google) | 6.6.0 | + +## Providers + +| Name | Version | +|------|---------| +| [azurerm](#provider\_azurerm) | n/a | +| [google](#provider\_google) | 6.6.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [cluster\_data](#module\_cluster\_data) | ../cluster_data | n/a | + +## Resources + +| Name | Type | +|------|------| +| [google_bigquery_dataset.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset) | resource | +| [google_bigquery_dataset_iam_binding.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset_iam_binding) | resource | +| [google_bigquery_table.events](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_table) | resource | +| [google_kms_crypto_key.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_crypto_key) | resource | +| [google_kms_crypto_key_iam_member.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_crypto_key_iam_member) | resource | +| [google_kms_key_ring.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_key_ring) | resource | +| [google_service_account.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account) | resource | +| [google_service_account_iam_binding.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account_iam_binding) | resource | +| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | +| [azurerm_user_assigned_identity.gcp_wif](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/user_assigned_identity) | data source | +| [google_bigquery_default_service_account.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/data-sources/bigquery_default_service_account) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [azure\_resource\_prefix](#input\_azure\_resource\_prefix) | Prefix of Azure resources for the service | `string` | n/a | yes | +| [cluster](#input\_cluster) | AKS cluster name e.g. test, production... | `string` | n/a | yes | +| [environment](#input\_environment) | Service environment name e.g. production, test, pr-1234... | `string` | n/a | yes | +| [gcp\_dataset](#input\_gcp\_dataset) | Name of an existing dataset. Optional: if not provided, create a new dataset | `string` | `null` | no | +| [gcp\_key](#input\_gcp\_key) | Name of an existing customer-managed encryption key (CMEK). Optional: if not provided, create a new key | `string` | `null` | no | +| [gcp\_keyring](#input\_gcp\_keyring) | Name of an existing keyring. Optional: if not provided, create a new keyring | `string` | `null` | no | +| [gcp\_policy\_tag\_id](#input\_gcp\_policy\_tag\_id) | Policy tag ID | `number` | n/a | yes | +| [gcp\_project\_id](#input\_gcp\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... | `string` | n/a | yes | +| [gcp\_project\_number](#input\_gcp\_project\_number) | Google cloud project number | `number` | n/a | yes | +| [gcp\_table\_deletion\_protection](#input\_gcp\_table\_deletion\_protection) | Prevents deletion of the event table. Default: true | `bool` | `true` | no | +| [gcp\_taxonomy\_id](#input\_gcp\_taxonomy\_id) | Policy tags taxonomy ID | `number` | n/a | yes | +| [namespace](#input\_namespace) | AKS Namespace where the service is deployed to | `string` | n/a | yes | +| [service\_short](#input\_service\_short) | Short name for the service e.g. att, aytq... | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [bigquery\_dataset](#output\_bigquery\_dataset) | Bigquery dataset name | +| [bigquery\_project\_id](#output\_bigquery\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... | +| [bigquery\_table\_name](#output\_bigquery\_table\_name) | Biquery events table name | +| [dfe\_analytics\_variables\_map](#output\_dfe\_analytics\_variables\_map) | Map of environment variables required for dfe-analytics. Merge with application configuration secrets. | +| [google\_cloud\_credentials](#output\_google\_cloud\_credentials) | Credentials for Google workload identity federation | diff --git a/aks/dfe_analytics/variables.tf b/aks/dfe_analytics/variables.tf new file mode 100644 index 0000000..155ae02 --- /dev/null +++ b/aks/dfe_analytics/variables.tf @@ -0,0 +1,87 @@ +variable "azure_resource_prefix" { + type = string + description = "Prefix of Azure resources for the service" +} +variable "cluster" { + type = string + description = "AKS cluster name e.g. test, production..." +} +variable "namespace" { + type = string + description = "AKS Namespace where the service is deployed to" +} +variable "service_short" { + type = string + description = "Short name for the service e.g. att, aytq..." +} +variable "environment" { + type = string + description = "Service environment name e.g. production, test, pr-1234..." +} + +variable "gcp_dataset" { + type = string + description = "Name of an existing dataset. Optional: if not provided, create a new dataset" + default = null +} +variable "gcp_keyring" { + type = string + description = "Name of an existing keyring. Optional: if not provided, create a new keyring" + default = null +} +variable "gcp_key" { + type = string + description = "Name of an existing customer-managed encryption key (CMEK). Optional: if not provided, create a new key" + default = null +} +variable "gcp_project_id" { + type = string + description = "ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'..." +} +variable "gcp_project_number" { + type = number + description = "Google cloud project number" +} +variable "gcp_taxonomy_id" { + type = number + description = "Policy tags taxonomy ID" +} +variable "gcp_policy_tag_id" { + type = number + description = "Policy tag ID" +} +variable "gcp_table_deletion_protection" { + type = bool + description = "Prevents deletion of the event table. Default: true" + default = true +} + +locals { + # Global constants + gcp_region = "europe-west2" + gcp_table_name = "events" + gcp_workload_id_pool = "azure-cip-identity-pool" + + gcp_key_ring = "bigquery-${var.service_short}-${var.environment}" + gcp_key = "bigquery-${var.service_short}-${var.environment}" + gcp_dataset_name = var.gcp_dataset == null ? replace("${var.service_short}_events_${var.environment}_spike", "-", "_") : var.gcp_dataset + gcp_principal = "principal://iam.googleapis.com/projects/${var.gcp_project_number}/locations/global/workloadIdentityPools/${local.gcp_workload_id_pool}" + gcp_principal_with_subject = "${local.gcp_principal}/subject/${data.azurerm_user_assigned_identity.gcp_wif.principal_id}" + + gcp_credentials_map = { + universe_domain = "googleapis.com" + type = "external_account" + audience = "//iam.googleapis.com/projects/${var.gcp_project_number}/locations/global/workloadIdentityPools/azure-cip-identity-pool/providers/azure-cip-oidc-provider" + subject_token_type = "urn:ietf:params:oauth:token-type:jwt" + token_url = "https://sts.googleapis.com/v1/token" + credential_source = { + url = "https://login.microsoftonline.com/${data.azurerm_client_config.current.tenant_id}/oauth2/v2.0/token" + } + service_account_impersonation_url = "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/${google_service_account.appender.email}:generateAccessToken" + service_account_impersonation = { + token_lifetime_seconds = 3600 + } + } + gcp_credentials = jsonencode(local.gcp_credentials_map) + gcp_policy_tag_name = "projects/${var.gcp_project_id}/locations/${local.gcp_region}/taxonomies/${var.gcp_taxonomy_id}/policyTags/${var.gcp_policy_tag_id}" +}