Skip to content

Commit

Permalink
Create bulk-upload endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
jusdino committed Jun 4, 2024
1 parent c5b7d35 commit 71d9ba6
Show file tree
Hide file tree
Showing 43 changed files with 2,925 additions and 218 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
branches:
- main
- dev
- development

env:
AWS_REGION : "us-east-1"
Expand Down
2 changes: 2 additions & 0 deletions backend/bin/compile_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ pip-compile --no-emit-index-url --upgrade requirements.in
pip-compile --no-emit-index-url --upgrade requirements-dev.in
pip-compile --no-emit-index-url --upgrade lambdas/requirements.in
pip-compile --no-emit-index-url --upgrade lambdas/requirements-dev.in
pip-compile --no-emit-index-url --upgrade delete-objects-lambda/requirements.in
pip-compile --no-emit-index-url --upgrade delete-objects-lambda/requirements-dev.in
bin/sync_deps.sh
3 changes: 2 additions & 1 deletion backend/bin/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ REPORT="$1"

# Run CDK tests, tracking code coverage in a new data file
pytest --cov=. --cov-config=.coveragerc tests || exit "$?"
for dir in lambdas delete-objects-lambda
(
cd lambdas
cd "$dir"
# Run lambda tests, appending data to the same data file
pytest --cov=. --cov-config=.coveragerc --cov-append tests
) || exit "$?"
Expand Down
4 changes: 3 additions & 1 deletion backend/bin/sync_deps.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
pip-sync requirements.txt \
requirements-dev.txt \
lambdas/requirements.txt \
lambdas/requirements-dev.txt
lambdas/requirements-dev.txt \
delete-objects-lambda/requirements.txt \
delete-objects-lambda/requirements-dev.txt
4 changes: 2 additions & 2 deletions backend/cdk.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
"pathology"
],
"jurisdictions": [
"colorado",
"kentucky"
"co",
"ky"
]
}
},
Expand Down
90 changes: 90 additions & 0 deletions backend/common_constructs/access_logs_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from aws_cdk import Duration, Stack, CustomResourceProvider, RemovalPolicy
from aws_cdk.aws_iam import PolicyStatement, Effect, StarPrincipal
from aws_cdk.aws_s3 import Bucket as CdkBucket, BlockPublicAccess, BucketEncryption, ObjectOwnership, \
BucketAccessControl, IntelligentTieringConfiguration, LifecycleRule, Transition, StorageClass
from cdk_nag import NagSuppressions
from constructs import Construct


class AccessLogsBucket(CdkBucket):
def __init__(
self, scope: Construct, construct_id: str, **kwargs
):
stack = Stack.of(scope)

super().__init__(
scope, construct_id,
block_public_access=BlockPublicAccess.BLOCK_ALL,
encryption=BucketEncryption.S3_MANAGED,
enforce_ssl=True,
object_ownership=ObjectOwnership.BUCKET_OWNER_PREFERRED,
access_control=BucketAccessControl.LOG_DELIVERY_WRITE,
versioned=True,
intelligent_tiering_configurations=[
IntelligentTieringConfiguration(
name='ArchiveAfter6Mo',
archive_access_tier_time=Duration.days(180)
)
],
lifecycle_rules=[
LifecycleRule(
transitions=[
Transition(
storage_class=StorageClass.INTELLIGENT_TIERING,
transition_after=Duration.days(0)
)
]
)
],
**kwargs
)

auto_delete_provider: CustomResourceProvider = stack.node.try_find_child(
'Custom::S3AutoDeleteObjectsCustomResourceProvider'
)

if auto_delete_provider is not None \
and kwargs.get('removal_policy') == RemovalPolicy.DESTROY \
and kwargs.get('auto_delete_objects', False):
# Except for the auto delete provider role
delete_conditions = {
'conditions': {
'ArnNotEquals': {
'aws:PrincipalArn': auto_delete_provider.role_arn
}
}
}
else:
# No exceptions
delete_conditions = {}

# No deleting objects for anybody except delete_conditions
self.add_to_resource_policy(
PolicyStatement(
effect=Effect.DENY,
resources=[self.arn_for_objects('*')],
actions=['s3:DeleteObject'],
principals=[StarPrincipal()],
**delete_conditions
)
)

NagSuppressions.add_resource_suppressions(
self,
suppressions=[
{
'id': 'AwsSolutions-S1',
'reason': 'This is the access logging bucket'
},
{
'id': 'HIPAA.Security-S3BucketReplicationEnabled',
'reason': 'Bucket replication to a logs archive account may be added as a future enhancement'
},
{
'id': 'HIPAA.Security-S3DefaultEncryptionKMS',
'reason': 'This bucket is managed with S3 encryption, so that decryption capability can be readily'
' scoped to any operational support personnel at the account level. Adding KMS encryption'
' to this bucket specifically adds no security value'
}
]
)
28 changes: 28 additions & 0 deletions backend/common_constructs/bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from aws_cdk import Stack
from aws_cdk.aws_s3 import Bucket as CdkBucket, BlockPublicAccess, BucketEncryption, ObjectOwnership
from constructs import Construct

from common_constructs.access_logs_bucket import AccessLogsBucket


class Bucket(CdkBucket):
def __init__(
self, scope: Construct, construct_id: str, *,
server_access_logs_bucket: AccessLogsBucket,
**kwargs
):
stack = Stack.of(scope)
defaults = {
'encryption': BucketEncryption.S3_MANAGED
}
defaults.update(kwargs)

super().__init__(
scope, construct_id,
block_public_access=BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
object_ownership=ObjectOwnership.BUCKET_OWNER_ENFORCED,
server_access_logs_bucket=server_access_logs_bucket,
server_access_logs_prefix=f'_logs/{stack.account}/{stack.region}/{scope.node.path}/{construct_id}',
**defaults
)
80 changes: 80 additions & 0 deletions backend/common_constructs/python_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from typing import List

import jsii
from aws_cdk import Stack, Duration
from aws_cdk.aws_lambda import Runtime
from aws_cdk.aws_lambda_python_alpha import PythonFunction as CdkPythonFunction, ICommandHooks, BundlingOptions
from cdk_nag import NagSuppressions
from constructs import Construct


class PythonFunction(CdkPythonFunction):
"""
Standard Python lambda function that assumes unittest-compatible tests are written in the 'tests' directory.
On bundling, this function will validate the lambda by temporarily installing dev dependencies in
requirements-dev.txt, then executing and removing tests.
"""
def __init__(
self, scope: Construct, construct_id: str, **kwargs
):
defaults = {
'timeout': Duration.seconds(28),
}
defaults.update(kwargs)

super().__init__(
scope, construct_id,
bundling=BundlingOptions(command_hooks=TestingHooks()),
runtime=Runtime.PYTHON_3_12,
**defaults
)

stack = Stack.of(self)
NagSuppressions.add_resource_suppressions(
self,
suppressions=[
{
'id': 'HIPAA.Security-LambdaDLQ',
'reason': "These lambdas are synchronous and so don't require any DLQ configuration"
},
{
'id': 'HIPAA.Security-LambdaInsideVPC',
'reason': 'We may choose to move our lambdas into private VPC subnets in a future enhancement'
}
]
)
NagSuppressions.add_resource_suppressions_by_path(
stack,
path=f'{self.node.path}/ServiceRole/Resource',
suppressions=[
{
'id': 'AwsSolutions-IAM4',
'applies_to': [
'Policy::arn:<AWS::Partition>:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
],
'reason': 'The BasicExecutionRole policy is appropriate for these lambdas'
}
]
)


@jsii.implements(ICommandHooks)
class TestingHooks:
"""
Testing hooks that will automatically run the expected tests package to validate the lambda.
This command hook will temporarily install dev dependencies, then execute unittest-compatible
tests expected to be in the `tests` directory.
"""
def after_bundling(self, input_dir: str, output_dir: str) -> List[str]: # pylint: disable=unused-argument
return [
'mkdir _tmp_dev_deps',
'python -m pip install -r requirements-dev.txt -t _tmp_dev_deps',
'PYTHONPATH="$(pwd)/_tmp_dev_deps" python -m unittest discover -s tests',
'rm -rf _tmp_dev_deps',
'rm -rf tests'
]

def before_bundling(self, input_dir: str, output_dir: str) -> List[str]: # pylint: disable=unused-argument
return []
56 changes: 31 additions & 25 deletions backend/common_constructs/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,35 @@ def __init__(self, *args, standard_tags: StandardTags, **kwargs):

NagSuppressions.add_stack_suppressions(
self,
suppressions=[{
'id': 'HIPAA.Security-IAMNoInlinePolicy',
'reason': dedent('''
Prohibitions on inline policies are raised in favor of managed policies in order to support a
few goals:
- policy versioning
- reusability across resources that perform similar tasks
- rolling back on failures
- delegating permissions management
These goals are met differently in a CDK app. CDK itself allows for granular permissions crafting
that is attached to policies directly to each resource, by virtue of its Resource.grant_* methods.
This approach actually results in an improvement in the principle of least privilege, because each
resource in the app has permissions that are specifically crafted for that particular resource
and only allow exactly what it needs to do, rather than sharing, generally more coarse, managed
policies that approximate the access it needs to perform particular tasks. Those highly targeted
policies are appropriately attached to principals as inline policies. This approach leads to a
more maintainable and more secure implementation than the reusability and permissions delegation
that managed policies accomplish. Versioning of policies is accomplished through git itself as the
version control system that manages all of the infrastructure, runtime code, and policies for the
app, right here in this repository. Rolling back on failures is accomplished both through
CloudFormation as well as git again, as both have capabilities to perform much more cohesive
roll-backs than managed policies alone.
''')
}]
suppressions=[
{
'id': 'HIPAA.Security-IAMNoInlinePolicy',
'reason': dedent('''
Prohibitions on inline policies are raised in favor of managed policies in order to support a
few goals:
- policy versioning
- reusability across resources that perform similar tasks
- rolling back on failures
- delegating permissions management
These goals are met differently in a CDK app. CDK itself allows for granular permissions crafting
that is attached to policies directly to each resource, by virtue of its Resource.grant_* methods.
This approach actually results in an improvement in the principle of least privilege, because each
resource in the app has permissions that are specifically crafted for that particular resource
and only allow exactly what it needs to do, rather than sharing, generally more coarse, managed
policies that approximate the access it needs to perform particular tasks. Those highly targeted
policies are appropriately attached to principals as inline policies. This approach leads to a
more maintainable and more secure implementation than the reusability and permissions delegation
that managed policies accomplish. Versioning of policies is accomplished through git itself as the
version control system that manages all of the infrastructure, runtime code, and policies for the
app, right here in this repository. Rolling back on failures is accomplished both through
CloudFormation as well as git again, as both have capabilities to perform much more cohesive
roll-backs than managed policies alone.
''')
},
{
'id': 'HIPAA.Security-LambdaConcurrency',
'reason': 'The lambdas in this app will share account-wide concurrency limits'
}
]
)
26 changes: 26 additions & 0 deletions backend/delete-objects-lambda/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

import logging
import os

import boto3
from aws_lambda_powertools import Logger


logger = Logger()
logger.setLevel(logging.DEBUG if os.environ.get('DEBUG', 'false').lower() == 'true' else logging.INFO)

s3_client = boto3.client('s3')


@logger.inject_lambda_context()
def delete_objects(event, context): # pylint: disable=unused-argument
logger.info('Received event', event=event)
for record in event['Records']:
bucket_name = record['s3']['bucket']['name']
key = record['s3']['object']['key']
size = record['s3']['object']['size']
logger.info('Object', s3_url=f's3://{bucket_name}/{key}', size=size)
s3_client.delete_object(
Bucket=bucket_name,
Key=key
)
1 change: 1 addition & 0 deletions backend/delete-objects-lambda/requirements-dev.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
moto[all]
Loading

0 comments on commit 71d9ba6

Please sign in to comment.