Skip to content

Commit

Permalink
feat: New module volumes on existing catalogs (#645)
Browse files Browse the repository at this point in the history
  • Loading branch information
alldoami authored Oct 1, 2024
1 parent 1f935be commit ae60a9c
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 0 deletions.
41 changes: 41 additions & 0 deletions databricks-s3-volume-existing-catalog/grants.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
locals {
volume_r_grants = flatten([
for bucket in var.volume_buckets : [
for principal in bucket.volume_r_grant_principals : {
bucket_name = bucket.bucket_name
principal = principal
}
]
])

volume_rw_grants = flatten([
for bucket in var.volume_buckets : [
for principal in bucket.volume_rw_grant_principals : {
bucket_name = bucket.bucket_name
principal = principal
}
]
])
}

# Read-only access grants
resource "databricks_grant" "volume_r" {
for_each = { for grant in local.volume_r_grants : grant.volume_name => grant }

volume = databricks_volume.volume[each.value.volume_name].id
principal = each.value.principal
privileges = ["READ_VOLUME"]

depends_on = [databricks_volume.volume]
}

# Read/write access grants
resource "databricks_grant" "volume_rw" {
for_each = { for grant in local.volume_rw_grants : grant.volume_name => grant }

volume = databricks_volume.volume[each.value.volume_name].id
principal = each.value.principal
privileges = ["READ_VOLUME", "WRITE_VOLUME"]

depends_on = [databricks_volume.volume]
}
90 changes: 90 additions & 0 deletions databricks-s3-volume-existing-catalog/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
## Databricks external location and IAM

data "aws_caller_identity" "current" {
provider = aws
}

data "aws_iam_policy_document" "volume_dbx_unity_aws_role_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = ["4a2f419c-ae7a-49f1-b774-8f3113d9834d"]
}
}
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
}

actions = ["sts:AssumeRole"]
condition {
test = "ArnEquals"
variable = "aws:PrincipalArn"
values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.dbx_volume_aws_role_name}"]
}
}
}

resource "aws_iam_role" "volume_dbx_unity_aws_role" {
name = local.dbx_volume_aws_role_name
path = local.path
assume_role_policy = data.aws_iam_policy_document.volume_dbx_unity_aws_role_assume_role.json
}


### Policy document to access default volume bucket and assume role
data "aws_iam_policy_document" "volume_bucket_dbx_unity_access" {
statement {
sid = "dbxSCBucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
for bucket in var.volume_buckets : "arn:aws:s3:::${bucket.bucket_name}"
]
}
statement {
sid = "dbxSCObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
]
resources = [
for bucket in var.volume_buckets : "arn:aws:s3:::${bucket.bucket_name}/*"
]
}
statement {
sid = "databricksAssumeRole"
effect = "Allow"
actions = [
"sts:AssumeRole"
]
resources = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.dbx_volume_aws_role_name}"
]
}
}

resource "aws_iam_policy" "volume_dbx_unity_access_policy" {
policy = data.aws_iam_policy_document.volume_bucket_dbx_unity_access.json
}

resource "aws_iam_role_policy_attachment" "volume_dbx_unity_aws_access" {
policy_arn = aws_iam_policy.volume_dbx_unity_access_policy.arn
role = aws_iam_role.volume_dbx_unity_aws_role.name
}
54 changes: 54 additions & 0 deletions databricks-s3-volume-existing-catalog/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Volume bucket (UC supported)

// https://docs.databricks.com/administration-guide/multiworkspace/iam-role.html#language-Your%C2%A0VPC,%C2%A0custom
locals {
dbx_volume_aws_role_name = "${var.catalog_name}-volumes-role"
path = "/databricks/"
databricks_aws_account = "414351767826" # Databricks' own AWS account, not CZI's. See https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#step-1-create-a-cross-account-iam-role
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace

resource "databricks_storage_credential" "volume" {
depends_on = [
resource.aws_iam_role.volume_dbx_unity_aws_role,
resource.aws_iam_role_policy_attachment.volume_dbx_unity_aws_access
]

name = "${var.catalog_name}-volumes-storage-credential"
aws_iam_role {
role_arn = aws_iam_role.volume_dbx_unity_aws_role.arn
}
comment = "Managed by Terraform - access for the volumes in ${var.catalog_name}"
}

# upstream external location sometimes takes a moment to register
resource "time_sleep" "wait_30_seconds" {
depends_on = [databricks_storage_credential.volume]

create_duration = "30s"
}

resource "databricks_external_location" "volume" {
for_each = { for bucket in var.volume_buckets : bucket.volume_name => bucket }
depends_on = [time_sleep.wait_30_seconds]

name = "${each.value.volume_name}-external-location"
url = "s3://${each.value.bucket_name}"
credential_name = databricks_storage_credential.volume.name
comment = "Managed by Terraform - access for the volume named ${each.value.bucket_name} in ${var.catalog_name}"
}

# New volume
resource "databricks_volume" "volume" {
for_each = { for bucket in var.volume_buckets : bucket.volume_name => bucket }
depends_on = [databricks_external_location.volume]
name = each.value.volume_name
catalog_name = var.catalog_name
schema_name = var.schema_name
volume_type = "EXTERNAL"
storage_location = "s3://${each.value.bucket_name}${each.value.bucket_prefix != "" ? "/${each.value.bucket_prefix}" : ""}"
owner = var.catalog_owner
comment = "Managed by Terraform - access for the volume named ${each.value.bucket_name} in ${var.catalog_name}"
}
36 changes: 36 additions & 0 deletions databricks-s3-volume-existing-catalog/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
variable "catalog_name" {
description = "Name of the Databricks existing catalog to add the volume to"
type = string
}

variable "catalog_owner" {
description = "User or group name of the catalog owner"
type = string
}

variable "schema_name" {
description = "Name of the Databricks schema to add the volume to"
type = string
}

variable "volume_buckets" {
description = "List of external buckets and their corresponding groups that should have r/rw access to it"
type = list(object({
volume_name : string
bucket_name : string
bucket_prefix: optional(string, "")
volume_r_grant_principals: optional(list(string), [])
volume_rw_grant_principals: optional(list(string), [])
}))
}

variable "tags" {
description = "REQUIRED: Tags to include for this environment."
type = object({
project : string
env : string
service : string
owner : string
managedBy : string
})
}
11 changes: 11 additions & 0 deletions databricks-s3-volume-existing-catalog/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
databricks = {
source = "databricks/databricks"
}
}
required_version = ">= 1.3.0"
}

0 comments on commit ae60a9c

Please sign in to comment.