Skip to content

Commit

Permalink
test unhealthy shares (#1649)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
- Feature

### Detail

New tests
* deliberately break shares (by removing permissions
* run the verifier to assert that are unhealthy
* run the reapplier to assert that they can be fixed

Imrpovements
* Drop updated_persistent env and always update the persitent_env on
get_or_create_env()
* Use contextmanagers to create environments and handle their lifecycle
in one place
* Make redshift tests optional based on configuration

### Relates
Solves parts of #1376 

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
petrkalos authored Oct 30, 2024
1 parent 0949893 commit 4457130
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 99 deletions.
10 changes: 9 additions & 1 deletion backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,8 @@ def create_integration_tests_role(self):
'arn:aws:s3:::dataalltesting*/*',
'arn:aws:s3:::dataall-session*',
'arn:aws:s3:::dataall-session*/*',
'arn:aws:s3:::dataall-test-session*',
'arn:aws:s3:::dataall-test-session*/*',
'arn:aws:s3:::dataall-temp*',
'arn:aws:s3:::dataall-temp*/*',
],
Expand All @@ -613,6 +615,7 @@ def create_integration_tests_role(self):
iam.PolicyStatement(
actions=[
'lakeformation:GrantPermissions',
'lakeformation:RevokePermissions',
'lakeformation:PutDataLakeSettings',
'lakeformation:GetDataLakeSettings',
'glue:GetDatabase',
Expand Down Expand Up @@ -674,9 +677,14 @@ def create_integration_tests_role(self):
'iam:DeleteRole',
'iam:PutRolePolicy',
'iam:DeleteRolePolicy',
'iam:DetachRolePolicy',
'iam:ListAttachedRolePolicies',
],
effect=iam.Effect.ALLOW,
resources=[f'arn:aws:iam::{self.account}:role/dataall-test-*'],
resources=[
f'arn:aws:iam::{self.account}:role/dataall-test-*',
f'arn:aws:iam::{self.account}:role/dataall-session*',
],
),
)

Expand Down
13 changes: 0 additions & 13 deletions tests_new/clean_up_s3.sh

This file was deleted.

139 changes: 67 additions & 72 deletions tests_new/integration_tests/core/environment/global_conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
from contextlib import contextmanager

import pytest
import boto3
from assertpy import assert_that

from integration_tests.aws_clients.sts import STSClient
from integration_tests.client import GqlError
Expand All @@ -14,18 +15,41 @@
)
from integration_tests.core.organizations.queries import create_organization
from integration_tests.core.stack.utils import check_stack_ready
from tests_new.integration_tests.core.environment.utils import update_env_stack
from tests_new.integration_tests.aws_clients.s3 import S3Client
from tests_new.integration_tests.core.environment.utils import update_env_stack

log = logging.getLogger(__name__)


def create_env(client, env_name, group, org_uri, account_id, region, tags=[]):
env = create_environment(
client, name=env_name, group=group, organizationUri=org_uri, awsAccountId=account_id, region=region, tags=tags
)
check_stack_ready(client, env.environmentUri, env.stack.stackUri)
return get_environment(client, env.environmentUri)
@contextmanager
def create_env(client, env_name, group, org_uri, account_id, region, tags=[], retain=False):
env = None
errors = False
try:
env = create_environment(
client,
name=env_name,
group=group,
organizationUri=org_uri,
awsAccountId=account_id,
region=region,
tags=tags,
)
check_stack_ready(client, env.environmentUri, env.stack.stackUri)
env = get_environment(client, env.environmentUri)
assert_that(env.stack.status).is_in('CREATE_COMPLETE', 'UPDATE_COMPLETE')
yield env
except Exception as e:
errors = True
raise e
finally:
if env and (not retain or errors):
role = f'arn:aws:iam::{env.AwsAccountId}:role/dataall-integration-tests-role-{env.region}'
session = STSClient(role_arn=role, region=env.region, session_name='Session_1').get_refreshable_session()
S3Client(session=session, account=env.AwsAccountId, region=env.region).delete_bucket(
env.EnvironmentDefaultBucketName
)
delete_env(client, env)


def delete_env(client, env):
Expand All @@ -46,21 +70,11 @@ def delete_env(client, env):
@pytest.fixture(scope='session')
def session_env1(client1, group1, group5, org1, session_id, testdata):
envdata = testdata.envs['session_env1']
env = None
try:
env = create_env(
client1, 'session_env1', group1, org1.organizationUri, envdata.accountId, envdata.region, tags=[session_id]
)
with create_env(
client1, 'session_env1', group1, org1.organizationUri, envdata.accountId, envdata.region, tags=[session_id]
) as env:
invite_group_on_env(client1, env.environmentUri, group5, ['CREATE_DATASET', 'CREATE_SHARE_OBJECT'])
yield env
finally:
if env:
role = f'arn:aws:iam::{env.AwsAccountId}:role/dataall-integration-tests-role-{env.region}'
session = STSClient(role_arn=role, region=env.region, session_name='Session_1').get_refreshable_session()
S3Client(session=session, account=env.AwsAccountId, region=env.region).delete_bucket(
env.EnvironmentDefaultBucketName
)
delete_env(client1, env)


@pytest.fixture(scope='session')
Expand All @@ -78,21 +92,16 @@ def session_env1_aws_client(session_env1, session_env1_integration_role_arn):
@pytest.fixture(scope='session')
def session_cross_acc_env_1(client5, group5, testdata, org1, session_id):
envdata = testdata.envs['session_cross_acc_env_1']
env = None
try:
env = create_env(
client5,
'session_cross_acc_env_1',
group5,
org1.organizationUri,
envdata.accountId,
envdata.region,
tags=[session_id],
)
with create_env(
client5,
'session_cross_acc_env_1',
group5,
org1.organizationUri,
envdata.accountId,
envdata.region,
tags=[session_id],
) as env:
yield env
finally:
if env:
delete_env(client5, env)


@pytest.fixture(scope='session')
Expand Down Expand Up @@ -124,16 +133,11 @@ def persistent_env1_aws_client(persistent_env1, persistent_env1_integration_role
@pytest.fixture(scope='session')
def session_env2(client1, group1, group2, org2, session_id, testdata):
envdata = testdata.envs['session_env2']
env = None
try:
env = create_env(
client1, 'session_env2', group1, org2.organizationUri, envdata.accountId, envdata.region, tags=[session_id]
)
with create_env(
client1, 'session_env2', group1, org2.organizationUri, envdata.accountId, envdata.region, tags=[session_id]
) as env:
invite_group_on_env(client1, env.environmentUri, group2, ['CREATE_DATASET'])
yield env
finally:
if env:
delete_env(client1, env)


"""
Expand All @@ -145,13 +149,8 @@ def session_env2(client1, group1, group2, org2, session_id, testdata):
@pytest.fixture(scope='function')
def temp_env1(client1, group1, org1, testdata):
envdata = testdata.envs['temp_env1']
env = None
try:
env = create_env(client1, 'temp_env1', group1, org1.organizationUri, envdata.accountId, envdata.region)
with create_env(client1, 'temp_env1', group1, org1.organizationUri, envdata.accountId, envdata.region) as env:
yield env
finally:
if env:
delete_env(client1, env)


"""
Expand All @@ -160,43 +159,39 @@ def temp_env1(client1, group1, org1, testdata):
"""


@contextmanager
def get_or_create_persistent_env(env_name, client, group, testdata):
envs = list_environments(client, term=env_name).nodes
if envs:
return envs[0]
env = envs[0]
update_env_stack(client, env)
yield get_environment(client, env.environmentUri)
else:
envdata = testdata.envs[env_name]
org = create_organization(client, f'org_{env_name}', group)
env = create_env(
client, env_name, group, org.organizationUri, envdata.accountId, envdata.region, tags=[env_name]
)
if env.stack.status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']:
return env
else:
delete_env(client, env)
raise RuntimeError(f'failed to create {env_name=} {env=}')
with create_env(
client,
env_name,
group,
org.organizationUri,
envdata.accountId,
envdata.region,
tags=[env_name],
retain=True,
) as env:
yield env


@pytest.fixture(scope='session')
def persistent_env1(client1, group1, testdata):
return get_or_create_persistent_env('persistent_env1', client1, group1, testdata)


@pytest.fixture(scope='session')
def updated_persistent_env1(client1, group1, persistent_env1):
update_env_stack(client1, persistent_env1)
return get_environment(client1, persistent_env1.environmentUri)
with get_or_create_persistent_env('persistent_env1', client1, group1, testdata) as env:
yield env


@pytest.fixture(scope='session')
def persistent_cross_acc_env_1(client5, group5, testdata):
return get_or_create_persistent_env('persistent_cross_acc_env_1', client5, group5, testdata)


@pytest.fixture(scope='session')
def updated_persistent_cross_acc_env_1(client5, group5, persistent_cross_acc_env_1):
update_env_stack(client5, persistent_cross_acc_env_1)
return get_environment(client5, persistent_cross_acc_env_1.environmentUri)
with get_or_create_persistent_env('persistent_cross_acc_env_1', client5, group5, testdata) as env:
yield env


@pytest.fixture(scope='session')
Expand Down
11 changes: 11 additions & 0 deletions tests_new/integration_tests/modules/s3_datasets/aws_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import json
import re
import os
from typing import List

from botocore.exceptions import ClientError

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -312,3 +314,12 @@ def grant_create_database(self, role_arn):
return True
except ClientError as e:
log.exception('Error granting permissions to create database')

def revoke_db_perms(self, principal_arn: str, db_name: str, permissions: List[str]):
self._client.revoke_permissions(
Principal={
'DataLakePrincipalIdentifier': principal_arn,
},
Resource={'Database': {'Name': db_name}},
Permissions=permissions,
)
31 changes: 31 additions & 0 deletions tests_new/integration_tests/modules/shares/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,34 @@ def get_s3_consumption_data(client, shareUri: str):

response = client.query(query=query)
return response.data.getS3ConsumptionData


def reapply_share_object_items(client, dataset_uri: str):
query = {
'operationName': 'reApplyShareObjectItemsOnDataset',
'variables': {'input': dataset_uri},
'query': f"""
mutation reApplyShareObjectItemsOnDataset($datasetUri: String!) {{
reApplyShareObjectItemsOnDataset(datasetUri: $datasetUri)
}}
""",
}
response = client.query(query=query)
return response.data.reApplyShareObjectItemsOnDataset


def reapply_items_share_object(client, share_uri: str, item_uris: List[str]):
query = {
'operationName': 'reApplyItemsShareObject',
'variables': {'input': {'shareUri': share_uri, 'itemUris': item_uris}},
'query': f"""
mutation reApplyItemsShareObject($input: ShareItemSelectorInput) {{
reApplyItemsShareObject(input: $input) {{
shareUri
status
}}
}}
""",
}
response = client.query(query=query)
return response.data.reApplyItemsShareObject
Original file line number Diff line number Diff line change
Expand Up @@ -253,15 +253,15 @@ def persistent_consumption_role_1(client5, group5, persistent_cross_acc_env_1, p
def persistent_group_share_1(
client5,
client1,
updated_persistent_env1,
updated_persistent_cross_acc_env_1,
persistent_env1,
persistent_cross_acc_env_1,
updated_persistent_s3_dataset1,
group5,
):
share1 = create_share_object(
client=client5,
dataset_or_item_params={'datasetUri': updated_persistent_s3_dataset1.datasetUri},
environmentUri=updated_persistent_cross_acc_env_1.environmentUri,
environmentUri=persistent_cross_acc_env_1.environmentUri,
groupUri=group5,
principalId=group5,
principalType='Group',
Expand All @@ -285,16 +285,16 @@ def persistent_group_share_1(
def persistent_role_share_1(
client5,
client1,
updated_persistent_env1,
updated_persistent_cross_acc_env_1,
persistent_env1,
persistent_cross_acc_env_1,
updated_persistent_s3_dataset1,
group5,
persistent_consumption_role_1,
):
share1 = create_share_object(
client=client5,
dataset_or_item_params={'datasetUri': updated_persistent_s3_dataset1.datasetUri},
environmentUri=updated_persistent_cross_acc_env_1.environmentUri,
environmentUri=persistent_cross_acc_env_1.environmentUri,
groupUri=group5,
principalId=persistent_consumption_role_1.consumptionRoleUri,
principalType='ConsumptionRole',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def check_share_succeeded(client, shareUri, check_contains_all_item_types=False)
assert_that(items).extracting('itemType').contains(*ALL_S3_SHARABLE_TYPES_NAMES)


def check_verify_share_items(client, shareUri):
def check_verify_share_items(client, shareUri, expected_health_status=['Healthy'], expected_health_msg=[]):
share = get_share_object(client, shareUri, {'isShared': True})
items = share['items'].nodes
times = [item.lastVerificationTime for item in items]
Expand All @@ -93,8 +93,11 @@ def check_verify_share_items(client, shareUri):
updated_share = get_share_object(client, shareUri, {'isShared': True})
items = updated_share['items'].nodes
assert_that(items).extracting('status').contains_only('Share_Succeeded')
assert_that(items).extracting('healthStatus').contains_only('Healthy')
assert_that(items).extracting('healthStatus').contains_only(*expected_health_status)
assert_that(items).extracting('lastVerificationTime').does_not_contain(*times)
if expected_health_msg:
health_msgs = ' '.join([i.healthMessage for i in items])
assert_that(health_msgs).contains(*expected_health_msg)


def check_table_access(
Expand Down
Loading

0 comments on commit 4457130

Please sign in to comment.