Skip to content

Commit

Permalink
Ignore validation errors for reachability checks (#5805)
Browse files Browse the repository at this point in the history
  • Loading branch information
galvana authored Feb 24, 2025
1 parent f10c2c7 commit f2921c2
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ interface EditorSectionProps {
connectionKey: string;
}

const getReachabilityMessage = (details: any) => {
if (Array.isArray(details)) {
const firstDetail = details[0];

if (!firstDetail) {
return "";
}

const message = firstDetail.msg || "";
const location = firstDetail.loc ? ` (${firstDetail.loc})` : "";
return `${message}${location}`;
}
return details;
};

const EditorSection = ({ connectionKey }: EditorSectionProps) => {
const toast = useToast();
const dispatch = useAppDispatch();
Expand Down Expand Up @@ -269,7 +284,7 @@ const EditorSection = ({ connectionKey }: EditorSectionProps) => {
<Text fontSize="sm" whiteSpace="pre-wrap">
{reachability?.reachable
? "Dataset is reachable"
: `Dataset is not reachable. ${reachability?.details}`}
: `Dataset is not reachable. ${getReachabilityMessage(reachability?.details)}`}
</Text>
</HStack>
</HStack>
Expand Down
4 changes: 2 additions & 2 deletions src/fides/api/schemas/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, List, Optional
from typing import Any, Dict, List, Optional, Union

from fideslang.models import Dataset
from fideslang.validation import FidesKey
Expand Down Expand Up @@ -71,4 +71,4 @@ class DatasetReachability(FidesSchema):
"""

reachable: bool
details: Optional[str]
details: Optional[Union[str, List[Dict[str, Any]]]]
35 changes: 24 additions & 11 deletions src/fides/service/dataset/dataset_config_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple, Union

from fastapi.encoders import jsonable_encoder
from fideslang.models import Dataset as FideslangDataset
from loguru import logger
from pydantic import ValidationError as PydanticValidationError
Expand Down Expand Up @@ -146,28 +147,40 @@ def validate_dataset_config(

def get_dataset_reachability(
self, dataset_config: DatasetConfig, policy: Optional[Policy] = None
) -> Tuple[bool, Optional[str]]:
) -> Tuple[bool, Optional[Union[str, List[Dict[str, Any]]]]]:
"""
Determines if the given dataset is reachable along with an error message
"""
# First check if the target dataset is valid
try:
dataset_config.get_graph()
except PydanticValidationError as exc:
return False, jsonable_encoder(
exc.errors(
include_url=False, include_input=False, include_context=False
)
)

# Get all the dataset configs that are not associated with a disabled connection
# Get graphs for all enabled datasets
dataset_graphs = []
datasets = DatasetConfig.all(db=self.db)
dataset_graphs = [
dataset_config.get_graph()
for dataset_config in datasets
if not dataset_config.connection_config.disabled
]
for dataset in datasets:
if not dataset.connection_config.disabled:
try:
dataset_graphs.append(dataset.get_graph())
except PydanticValidationError:
continue

# We still want to check reachability even if our dataset config's connection is disabled.
# We also consider the siblings, because if the connection is enabled, then all the
# datasets will be enabled with it.
sibling_dataset_graphs = []
if dataset_config.connection_config.disabled:
sibling_datasets = dataset_config.connection_config.datasets
sibling_dataset_graphs = [
dataset_config.get_graph() for dataset_config in sibling_datasets
]
for sibling in dataset_config.connection_config.datasets:
try:
sibling_dataset_graphs.append(sibling.get_graph())
except PydanticValidationError:
continue

try:
dataset_graph = DatasetGraph(*dataset_graphs, *sibling_dataset_graphs)
Expand Down
74 changes: 74 additions & 0 deletions tests/ops/service/dataset/test_dataset_config_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
from pytest import FixtureRequest
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import flag_modified

from fides.api.models.connectionconfig import ConnectionConfig
from fides.api.models.datasetconfig import DatasetConfig
Expand All @@ -13,6 +14,20 @@
from tests.conftest import wait_for_tasks_to_complete


def make_dataset_invalid(db: Session, dataset_config: DatasetConfig) -> None:
"""Helper function to make a dataset invalid by setting an invalid data type in its collections"""
ctl_dataset = dataset_config.ctl_dataset

# Set an invalid data type on the first field
ctl_dataset.collections[0]["fields"][0]["fides_meta"] = {
"data_type": "invalid_type"
}
flag_modified(ctl_dataset, "collections")

db.add(ctl_dataset)
db.commit()


@pytest.fixture
def dataset_config_service(db: Session) -> DatasetConfigService:
return DatasetConfigService(db)
Expand Down Expand Up @@ -97,6 +112,65 @@ def test_get_dataset_reachability_ignores_sibling_datasets(
)
assert reachable == (True, None)

def test_get_dataset_reachability_invalid_dataset_config(
self,
dataset_config_service: DatasetConfigService,
single_identity_dataset_config: DatasetConfig,
db: Session,
):
"""Test that dataset with invalid datatype fails validation by directly modifying the database"""
make_dataset_invalid(db, single_identity_dataset_config)

reachable = dataset_config_service.get_dataset_reachability(
single_identity_dataset_config,
)
assert reachable == (
False,
[
{
"type": "value_error",
"loc": ["collections", 0, "fields", 0, "fides_meta", "data_type"],
"msg": "Value error, The data type invalid_type is not supported.",
}
],
)

def test_get_dataset_reachability_ignores_validation_error_on_other_connection(
self,
dataset_config_service: DatasetConfigService,
single_identity_dataset_config: DatasetConfig,
unreachable_dataset_on_different_connection: DatasetConfig,
db: Session,
):
"""Test that validation errors on other connections are ignored"""
make_dataset_invalid(db, unreachable_dataset_on_different_connection)

reachable = dataset_config_service.get_dataset_reachability(
single_identity_dataset_config,
)
assert reachable == (True, None)

def test_get_dataset_reachability_ignores_sibling_validation_error(
self,
db: Session,
dataset_config_service: DatasetConfigService,
single_identity_dataset_config: DatasetConfig,
no_identities_dataset_config: DatasetConfig,
connection_config: ConnectionConfig,
):
"""Test that validation errors on sibling datasets are ignored"""
# Disable the connection
connection_config.disabled = True
db.commit()

# Make the sibling dataset invalid
make_dataset_invalid(db, no_identities_dataset_config)

reachable = dataset_config_service.get_dataset_reachability(
single_identity_dataset_config,
)
assert reachable == (True, None)


class TestGetIdentitiesAndReferences:

Expand Down

0 comments on commit f2921c2

Please sign in to comment.