Skip to content

Commit

Permalink
Grant/revoke LF access when objects are saved/deleted
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljcollinsuk committed Aug 27, 2024
1 parent 3b17093 commit fe49773
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 2 deletions.
55 changes: 53 additions & 2 deletions ap/aws/lakeformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def grant_table_permissions(
catalog_id: str = "",
resource_catalog_id: str = "",
region_name: str = "",
permissions: list | None = None,
):
client = self.get_client(region_name)
return client.grant_permissions(
Expand All @@ -28,7 +29,7 @@ def grant_table_permissions(
"CatalogId": resource_catalog_id or self.catalog_id,
},
},
Permissions=["SELECT"],
Permissions=permissions or ["SELECT"],
CatalogId=catalog_id or self.catalog_id,
)

Expand All @@ -47,6 +48,7 @@ def grant_database_permissions(
region_name: str = "",
catalog_id: str = "",
resource_catalog_id: str = "",
permissions: list | None = None,
):
"""
Grant the principal permissions to the database.
Expand All @@ -60,6 +62,55 @@ def grant_database_permissions(
"CatalogId": resource_catalog_id or self.catalog_id,
},
},
Permissions=["DESCRIBE"],
Permissions=permissions or ["DESCRIBE"],
CatalogId=catalog_id or self.catalog_id,
)

def revoke_table_permissions(
self,
database: str,
table: str,
principal: str,
catalog_id: str = "",
resource_catalog_id: str = "",
region_name: str = "",
permissions: list | None = None,
):
client = self.get_client(region_name)
return client.revoke_permissions(
Principal={"DataLakePrincipalIdentifier": principal},
Resource={
"Table": {
"DatabaseName": database,
"Name": table,
"CatalogId": resource_catalog_id or self.catalog_id,
},
},
Permissions=permissions or ["SELECT"],
CatalogId=catalog_id or self.catalog_id,
)

def revoke_database_permissions(
self,
database: str,
principal: str,
region_name: str = "",
catalog_id: str = "",
resource_catalog_id: str = "",
permissions: list | None = None,
):
"""
Grant the principal permissions to the database.
"""
client = self.get_client(region_name)
return client.revoke_permissions(
Principal={"DataLakePrincipalIdentifier": principal},
Resource={
"Database": {
"Name": database,
"CatalogId": resource_catalog_id or self.catalog_id,
},
},
Permissions=permissions or ["DESCRIBE"],
CatalogId=catalog_id or self.catalog_id,
)
88 changes: 88 additions & 0 deletions ap/database_access/models/access.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from functools import cached_property

from django.db import models
from django.urls import reverse

from django_extensions.db.models import TimeStampedModel

from ap import aws


class AccessLevel(models.Model):
class Entity(models.TextChoices):
Expand Down Expand Up @@ -36,6 +40,16 @@ class Meta:

def save(self, *args, **kwargs):
create = self.pk is None

# grant access
lake_formation = aws.LakeFormationService()
quicksight_user = lake_formation.arn(
resource=f"user/default/{self.user.email}",
service="quicksight",
)
lake_formation.grant_database_permissions(
database=self.name, principal=quicksight_user, permissions=["DESCRIBE"]
)
super().save(*args, **kwargs)
if create:
self.access_levels.add(
Expand All @@ -44,6 +58,16 @@ def save(self, *args, **kwargs):
)[0]
)

def delete(self, *args, **kwargs):
# revoke access
lake_formation = aws.LakeFormationService()
quicksight_user = lake_formation.arn(
resource=f"user/default/{self.user.email}",
service="quicksight",
)
lake_formation.revoke_database_permissions(database=self.name, principal=quicksight_user)
super().delete(*args, **kwargs)


class TableAccess(TimeStampedModel):
database_access = models.ForeignKey(
Expand All @@ -61,6 +85,18 @@ class TableAccess(TimeStampedModel):
class Meta:
unique_together = ("database_access", "name")

@cached_property
def table_details(self):
"""
Returns information about the table from the Glue API, including the source catalog ID and
source table that it belongs to. We could choose to store this informtion on the model to
save API calls, as details that we need such as the database name and catalog ID do not
change. But this would still be duplicated information so may not be worth it.
"""
return aws.GlueService().get_table_detail(
database_name=self.database_access.name, table_name=self.name
)

def get_absolute_url(self, viewname: str = "database_access:manage_table_access"):
return reverse(
viewname=viewname,
Expand All @@ -73,3 +109,55 @@ def get_absolute_url(self, viewname: str = "database_access:manage_table_access"

def get_absolute_revoke_url(self):
return self.get_absolute_url(viewname="database_access:revoke_table_access")

def save(self, **kwargs):
# update LF access
lake_formation = aws.LakeFormationService()
quicksight_user = lake_formation.arn(
resource=f"user/default/{self.database_access.user.email}",
service="quicksight",
)

# there is no good way to check the region of the source (shared) table, so for now assume
# that it will always be eu-west-1, as this is where data lives in the data-prod account,
# and tables have to be shared to the same region. If data starts to be shared from other
# accounts/regions we will need to think of a more dynamic solution eg. check the catalog ID
region_name = "eu-west-1"
lake_formation.grant_table_permissions(
# call glue API to get the name of the shared db. Alternatively we could infer it based
# on what is implemented on the module that shares databases e.g. strip _rl if present
database=self.table_details["DatabaseName"],
table=self.name,
principal=quicksight_user,
resource_catalog_id=self.table_details["CatalogId"],
region_name=region_name,
)

return super().save(**kwargs)

def delete(self, **kwargs):
# update LF access
lake_formation = aws.LakeFormationService()
quicksight_user = lake_formation.arn(
resource=f"user/default/{self.database_access.user.email}",
service="quicksight",
)

# there is no good way to check the region of the source (shared) table, so for now assume
# that it will always be eu-west-1, as this is where data lives in the data-prod account,
# and tables have to be shared to the same region. If data starts to be shared from other
# accounts/regions we will need to think of a more dynamic solution eg. check the catalog ID
region_name = "eu-west-1"
lake_formation.revoke_table_permissions(
database=self.table_details["DatabaseName"],
table=self.name,
principal=quicksight_user,
resource_catalog_id=self.table_details["CatalogId"],
region_name=region_name,
)

super().delete(**kwargs)

if not self.database_access.table_access.exists():
# if this was the last table access for the database, revoke database access
self.database_access.delete()

0 comments on commit fe49773

Please sign in to comment.