diff --git a/databricks-s3-volume-existing-catalog/grants.tf b/databricks-s3-volume-existing-catalog/grants.tf new file mode 100644 index 00000000..f3d23fed --- /dev/null +++ b/databricks-s3-volume-existing-catalog/grants.tf @@ -0,0 +1,41 @@ +locals { + volume_r_grants = flatten([ + for bucket in var.volume_buckets : [ + for principal in bucket.volume_r_grant_principals : { + bucket_name = bucket.bucket_name + principal = principal + } + ] + ]) + + volume_rw_grants = flatten([ + for bucket in var.volume_buckets : [ + for principal in bucket.volume_rw_grant_principals : { + bucket_name = bucket.bucket_name + principal = principal + } + ] + ]) +} + +# Read-only access grants +resource "databricks_grant" "volume_r" { + for_each = { for grant in local.volume_r_grants : grant.volume_name => grant } + + volume = databricks_volume.volume[each.value.volume_name].id + principal = each.value.principal + privileges = ["READ_VOLUME"] + + depends_on = [databricks_volume.volume] +} + +# Read/write access grants +resource "databricks_grant" "volume_rw" { + for_each = { for grant in local.volume_rw_grants : grant.volume_name => grant } + + volume = databricks_volume.volume[each.value.volume_name].id + principal = each.value.principal + privileges = ["READ_VOLUME", "WRITE_VOLUME"] + + depends_on = [databricks_volume.volume] +} diff --git a/databricks-s3-volume-existing-catalog/iam.tf b/databricks-s3-volume-existing-catalog/iam.tf new file mode 100644 index 00000000..255f8b5d --- /dev/null +++ b/databricks-s3-volume-existing-catalog/iam.tf @@ -0,0 +1,90 @@ +## Databricks external location and IAM + +data "aws_caller_identity" "current" { + provider = aws +} + +data "aws_iam_policy_document" "volume_dbx_unity_aws_role_assume_role" { + statement { + principals { + type = "AWS" + identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"] + } + + actions = ["sts:AssumeRole"] + condition { + test = "StringEquals" + variable = "sts:ExternalId" + + values = ["4a2f419c-ae7a-49f1-b774-8f3113d9834d"] + } + } + statement { + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] + } + + actions = ["sts:AssumeRole"] + condition { + test = "ArnEquals" + variable = "aws:PrincipalArn" + values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.dbx_volume_aws_role_name}"] + } + } +} + +resource "aws_iam_role" "volume_dbx_unity_aws_role" { + name = local.dbx_volume_aws_role_name + path = local.path + assume_role_policy = data.aws_iam_policy_document.volume_dbx_unity_aws_role_assume_role.json +} + + +### Policy document to access default volume bucket and assume role +data "aws_iam_policy_document" "volume_bucket_dbx_unity_access" { + statement { + sid = "dbxSCBucketAccess" + effect = "Allow" + actions = [ + "s3:ListBucket", + "s3:GetBucketLocation", + "s3:GetLifecycleConfiguration", + "s3:PutLifecycleConfiguration" + ] + resources = [ + for bucket in var.volume_buckets : "arn:aws:s3:::${bucket.bucket_name}" + ] + } + statement { + sid = "dbxSCObjAccess" + effect = "Allow" + actions = [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + ] + resources = [ + for bucket in var.volume_buckets : "arn:aws:s3:::${bucket.bucket_name}/*" + ] + } + statement { + sid = "databricksAssumeRole" + effect = "Allow" + actions = [ + "sts:AssumeRole" + ] + resources = [ + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.dbx_volume_aws_role_name}" + ] + } +} + +resource "aws_iam_policy" "volume_dbx_unity_access_policy" { + policy = data.aws_iam_policy_document.volume_bucket_dbx_unity_access.json +} + +resource "aws_iam_role_policy_attachment" "volume_dbx_unity_aws_access" { + policy_arn = aws_iam_policy.volume_dbx_unity_access_policy.arn + role = aws_iam_role.volume_dbx_unity_aws_role.name +} diff --git a/databricks-s3-volume-existing-catalog/main.tf b/databricks-s3-volume-existing-catalog/main.tf new file mode 100644 index 00000000..00f5588e --- /dev/null +++ b/databricks-s3-volume-existing-catalog/main.tf @@ -0,0 +1,54 @@ +# Volume bucket (UC supported) + +// https://docs.databricks.com/administration-guide/multiworkspace/iam-role.html#language-Your%C2%A0VPC,%C2%A0custom +locals { + dbx_volume_aws_role_name = "${var.catalog_name}-volumes-role" + path = "/databricks/" + databricks_aws_account = "414351767826" # Databricks' own AWS account, not CZI's. See https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#step-1-create-a-cross-account-iam-role +} + +### Databricks storage credential - allows workspace to access an external location. +### NOTE: names need to be unique across an account, not just a workspace + +resource "databricks_storage_credential" "volume" { + depends_on = [ + resource.aws_iam_role.volume_dbx_unity_aws_role, + resource.aws_iam_role_policy_attachment.volume_dbx_unity_aws_access + ] + + name = "${var.catalog_name}-volumes-storage-credential" + aws_iam_role { + role_arn = aws_iam_role.volume_dbx_unity_aws_role.arn + } + comment = "Managed by Terraform - access for the volumes in ${var.catalog_name}" +} + +# upstream external location sometimes takes a moment to register +resource "time_sleep" "wait_30_seconds" { + depends_on = [databricks_storage_credential.volume] + + create_duration = "30s" +} + +resource "databricks_external_location" "volume" { + for_each = { for bucket in var.volume_buckets : bucket.volume_name => bucket } + depends_on = [time_sleep.wait_30_seconds] + + name = "${each.value.volume_name}-external-location" + url = "s3://${each.value.bucket_name}" + credential_name = databricks_storage_credential.volume.name + comment = "Managed by Terraform - access for the volume named ${each.value.bucket_name} in ${var.catalog_name}" +} + +# New volume +resource "databricks_volume" "volume" { + for_each = { for bucket in var.volume_buckets : bucket.volume_name => bucket } + depends_on = [databricks_external_location.volume] + name = each.value.volume_name + catalog_name = var.catalog_name + schema_name = var.schema_name + volume_type = "EXTERNAL" + storage_location = "s3://${each.value.bucket_name}${each.value.bucket_prefix != "" ? "/${each.value.bucket_prefix}" : ""}" + owner = var.catalog_owner + comment = "Managed by Terraform - access for the volume named ${each.value.bucket_name} in ${var.catalog_name}" +} diff --git a/databricks-s3-volume-existing-catalog/variables.tf b/databricks-s3-volume-existing-catalog/variables.tf new file mode 100644 index 00000000..5d266f55 --- /dev/null +++ b/databricks-s3-volume-existing-catalog/variables.tf @@ -0,0 +1,36 @@ +variable "catalog_name" { + description = "Name of the Databricks existing catalog to add the volume to" + type = string +} + +variable "catalog_owner" { + description = "User or group name of the catalog owner" + type = string +} + +variable "schema_name" { + description = "Name of the Databricks schema to add the volume to" + type = string +} + +variable "volume_buckets" { + description = "List of external buckets and their corresponding groups that should have r/rw access to it" + type = list(object({ + volume_name : string + bucket_name : string + bucket_prefix: optional(string, "") + volume_r_grant_principals: optional(list(string), []) + volume_rw_grant_principals: optional(list(string), []) + })) +} + +variable "tags" { + description = "REQUIRED: Tags to include for this environment." + type = object({ + project : string + env : string + service : string + owner : string + managedBy : string + }) +} \ No newline at end of file diff --git a/databricks-s3-volume-existing-catalog/versions.tf b/databricks-s3-volume-existing-catalog/versions.tf new file mode 100644 index 00000000..159e8002 --- /dev/null +++ b/databricks-s3-volume-existing-catalog/versions.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 1.3.0" +}