Skip to content

Commit

Permalink
Access for find-moj-data github actions
Browse files Browse the repository at this point in the history
We want to schedule Datahub DBT ingestions using github actions.
(ministryofjustice/data-catalogue#123)

To do this, Github actions needs to be able to assume a role via OIDC,
and use it to access the s3 bucket containing the outputs from DBT.
See https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-idp_oidc.html

We already had IRSAs (IAM roles for service accounts) which can be assumed by Datahub itself,
but these assume you are running an application in a kubernetes pod on
AWS, whereas in this case we are going to run the ingestion from github
actions.
  • Loading branch information
MatMoore committed May 28, 2024
1 parent 4d49ae8 commit 4eb5906
Showing 1 changed file with 34 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,40 @@ resource "aws_iam_policy" "datahub_read_cadet_bucket" {
policy = data.aws_iam_policy_document.datahub_read_cadet_bucket.json
}

# Allow Github actions to assume a role via OIDC.
# So that scheduled jobs in the find-moj-data repo can access the CaDeT bucket.
data "aws_iam_policy_document" "datahub_ingestion_github_actions" {
statement {
effect = "Allow"
actions = ["sts:AssumeRoleWithWebIdentity"]

principals {
type = "Federated"
identifiers = ["arn:aws:iam::${var.account_ids["analytical-platform-data-production"]}:oidc-provider/token.actions.githubusercontent.com"]
}
condition {
test = "StringEquals"
values = ["sts.amazonaws.com"]
variable = "token.actions.githubusercontent.com:aud"
}
condition {
test = "StringLike"
values = ["repo:ministryofjustice/find-moj-data:*"]
variable = "token.actions.githubusercontent.com:sub"
}
}
}

resource "aws_iam_role" "datahub_ingestion_github_actions" {
name = "datahub-ingestion-github-actions"
assume_role_policy = data.aws_iam_policy_document.datahub_ingestion_github_actions.json
}

resource "aws_iam_role_policy_attachment" "datahub_ingestion_github_actions" {
policy_arn = aws_iam_policy_document.datahub_read_cadet_bucket.arn
role = aws_iam_role.datahub_ingestion_github_actions.name
}

#trivy:ignore:avd-aws-0057:sensitive action 'glue:GetDatabases' on wildcarded resource
data "aws_iam_policy_document" "datahub_ingest_glue_datasets" {
statement {
Expand Down

0 comments on commit 4eb5906

Please sign in to comment.