diff --git a/RULES.md b/RULES.md index a65e41d..ace499e 100644 --- a/RULES.md +++ b/RULES.md @@ -71,4 +71,13 @@ There are mart model(s) referencing a source. ![Alt text](./images/no_references_to_source_from_marts.png) Data should be flowing from source centric to business centric areas like so: -![Alt text](./images/data_flow_diagram.png) \ No newline at end of file +![Alt text](./images/data_flow_diagram.png) + +# No owner on physical models +There are physical models without a designated owner. Physical models consist of the following: +- sources +- table materialization +- view materialization +- incremental materialization + +To ensure ownership and proper cataloguing of data is preserved, fill in all your physical models with the owner tag as described in the [Dbt reference](https://docs.getdbt.com/reference/resource-properties/meta/#designate-a-model-owner) \ No newline at end of file diff --git a/olivertwist/manifest.py b/olivertwist/manifest.py index 946125c..046af2a 100644 --- a/olivertwist/manifest.py +++ b/olivertwist/manifest.py @@ -6,7 +6,7 @@ """ from copy import deepcopy -from typing import Dict +from typing import Dict, Optional import networkx as nx @@ -78,6 +78,17 @@ def is_staging(self) -> bool: self.__fqn_contains("staging") or "stg_" in self.id ) + @property + def is_db_relation(self) -> bool: + return self.is_source or ( + self.data['resource_type'] == 'model' + and self.data.get('config', {}).get('materialized', {}) in ['incremental', 'table', 'view'] + ) + + @property + def owner(self) -> Optional[str]: + return self.data.get("meta", {}).get("owner", None) + @property def area(self) -> str: return self.data["fqn"][2] diff --git a/olivertwist/rules/no_owner_on_physical_models.py b/olivertwist/rules/no_owner_on_physical_models.py new file mode 100644 index 0000000..e7a2781 --- /dev/null +++ b/olivertwist/rules/no_owner_on_physical_models.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +"""Models should all be enabled. + +Copyright (C) 2020, Auto Trader UK +Created 15. Dec 2020 15:51 + +""" +from typing import List, Tuple, Optional + +from olivertwist.manifest import Manifest, Node +from olivertwist.ruleengine.rule import rule +from olivertwist.rules.utils import partition + + +@rule(id="no-owner-on-physical-models", name="No owner on physical models") +def no_owner_on_physical_models(manifest: Manifest) -> Tuple[List[Node], List[Node]]: + passes, failures = partition( + lambda x: x.is_db_relation and __is_none_or_blank(x.owner), + manifest.nodes() + ) + return list(passes), list(failures) + + +def __is_none_or_blank(owner: Optional[str]): + return owner is None or owner.strip(" ") == "" diff --git a/tests/rules/test_no_orphan_models.py b/tests/rules/test_no_orphan_models.py index 9518897..265c7c4 100644 --- a/tests/rules/test_no_orphan_models.py +++ b/tests/rules/test_no_orphan_models.py @@ -16,16 +16,18 @@ def manifest() -> Manifest: return Manifest( { "nodes": { - "a": {"unique_id": "a", "resource_type": "source"}, - "mart_b": {"unique_id": "mart_b", "resource_type": "model"}, + "stg_a": {"unique_id": "stg_a", "resource_type": "model"}, "stg_x": {"unique_id": "stg_x", "resource_type": "model"}, }, "child_map": { - "a": ["mart_b"], - "mart_b": [], + "source_a": ["stg_a"], + "stg_a": [], "stg_x": [], }, "disabled": [], + "sources": { + "source_a": {"unique_id": "source_a", "resource_type": "source"}, + }, } ) @@ -37,4 +39,4 @@ def test_no_orphaned_models_generates_correct_split(manifest): failure_ids = [f.id for f in failures] assert failure_ids == ["stg_x"] - assert pass_ids == ["a", "mart_b"] + assert pass_ids == ["source_a", "stg_a"] diff --git a/tests/rules/test_no_owner_on_physical_models.py b/tests/rules/test_no_owner_on_physical_models.py new file mode 100644 index 0000000..2c50cd5 --- /dev/null +++ b/tests/rules/test_no_owner_on_physical_models.py @@ -0,0 +1,125 @@ +import pytest + +from olivertwist.manifest import Manifest +from olivertwist.rules.no_owner_on_physical_models import no_owner_on_physical_models + + +@pytest.fixture +def manifest() -> Manifest: + return Manifest( + { + "nodes": { + "physical_node_1": { + "unique_id": "physical_node_1", + "resource_type": "model", + "config": {"materialized": "view"}, + "meta": {"owner": "Joe"} + }, + "physical_node_2": { + "unique_id": "physical_node_2", + "resource_type": "model", + "config": {"materialized": "table"}, + "meta": {"owner": "Joe"} + }, + "physical_node_3": { + "unique_id": "physical_node_3", + "resource_type": "model", + "config": {"materialized": "incremental"}, + "meta": {"owner": "Joe"} + }, + "ephemeral_node_1": { + "unique_id": "ephemeral_node_1", + "resource_type": "model", + "config": {"materialized": "ephemeral"}, + "meta": {"owner": "Joe"} + }, + "ephemeral_node_2": { + "unique_id": "ephemeral_node_2", + "resource_type": "model", + "config": {"materialized": "ephemeral"}, + "meta": {} + }, + "no_owner_physical_node_1": { + "unique_id": "no_owner_physical_node_1", + "resource_type": "model", + "config": {"materialized": "incremental"}, + "meta": {} + }, + "no_owner_physical_node_2": { + "unique_id": "no_owner_physical_node_2", + "resource_type": "model", + "config": {"materialized": "incremental"}, + "meta": {"owner": ""} + }, + "no_owner_physical_node_3": { + "unique_id": "no_owner_physical_node_3", + "resource_type": "model", + "config": {"materialized": "incremental"}, + "meta": {"owner": " "} + }, + }, + "child_map": { + "physical_node_1": [], + "physical_node_2": [], + "physical_node_3": [], + "ephemeral_node_1": [], + "ephemeral_node_2": [], + "no_owner_physical_node_1": [], + "no_owner_physical_node_2": [], + "no_owner_physical_node_3": [], + "source_1": [], + "no_owner_source_1": [], + "no_owner_source_2": [], + "no_owner_source_3": [], + }, + "disabled": [], + "sources": { + "source_1": { + "unique_id": "source_1", + "resource_type": "source", + "meta": {"owner": "Joe"} + }, + "no_owner_source_1": { + "unique_id": "no_owner_source_1", + "resource_type": "source", + "meta": {"owner": " "} + }, + "no_owner_source_2": { + "unique_id": "no_owner_source_2", + "resource_type": "source", + "meta": {"owner": ""} + }, + "no_owner_source_3": { + "unique_id": "no_owner_source_3", + "resource_type": "source", + "meta": {} + }, + + }, + + } + ) + + +def test_no_owner_on_physical_models(manifest): + passes, failures = no_owner_on_physical_models(manifest) + + pass_ids = [p.id for p in passes] + failure_ids = [f.id for f in failures] + + assert pass_ids == [ + "physical_node_1", + "physical_node_2", + "physical_node_3", + "ephemeral_node_1", + "ephemeral_node_2", + "source_1" + ] + assert failure_ids == [ + "no_owner_physical_node_1", + "no_owner_physical_node_2", + "no_owner_physical_node_3", + "no_owner_source_1", + "no_owner_source_2", + "no_owner_source_3", + ] diff --git a/tests/rules/test_no_references_to_source_from_marts.py b/tests/rules/test_no_references_to_source_from_marts.py index d97ef46..5317423 100644 --- a/tests/rules/test_no_references_to_source_from_marts.py +++ b/tests/rules/test_no_references_to_source_from_marts.py @@ -11,11 +11,6 @@ def manifest() -> Manifest: return Manifest( { "nodes": { - "source_1": { - "unique_id": "source_1", - "resource_type": "source", - "fqn": ["foo", "bar"], - }, "staging_1": { "unique_id": "staging_1", "resource_type": "model", @@ -34,7 +29,11 @@ def manifest() -> Manifest: }, "disabled": [], "sources": { - "source_1": {} + "source_1": { + "unique_id": "source_1", + "resource_type": "source", + "fqn": ["foo", "bar"], + }, }, } diff --git a/tests/rules/test_staging_has_single_source.py b/tests/rules/test_staging_has_single_source.py index 92f66a7..5dcea14 100644 --- a/tests/rules/test_staging_has_single_source.py +++ b/tests/rules/test_staging_has_single_source.py @@ -11,26 +11,11 @@ def manifest() -> Manifest: return Manifest( { "nodes": { - "a": { - "unique_id": "a", - "fqn": ["a"], - "resource_type": "source", - }, "staging.b": { "unique_id": "staging.b", "fqn": ["staging", "b"], "resource_type": "model", }, - "x": { - "unique_id": "x", - "fqn": ["x"], - "resource_type": "source", - }, - "y": { - "unique_id": "y", - "fqn": ["y"], - "resource_type": "source", - }, "staging.z": { "unique_id": "staging.z", "fqn": ["staging", "z"], @@ -46,9 +31,21 @@ def manifest() -> Manifest: }, "disabled": [], "sources": { - "a": {}, - "x": {}, - "y": {}, + "a": { + "unique_id": "a", + "fqn": ["a"], + "resource_type": "source", + }, + "x": { + "unique_id": "x", + "fqn": ["x"], + "resource_type": "source", + }, + "y": { + "unique_id": "y", + "fqn": ["y"], + "resource_type": "source", + }, }, } )