From f79a968a0908add9de1b6acfdb75a6b205134ebd Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 21 Oct 2021 11:42:23 -0500 Subject: [PATCH] add generic tests to test-paths (#4052) * removed overlooked breakpoint * first pass * save progress - singualr tests broken * fixed to work with both generic and singular tests * fixed formatting * added a comment * change to use /generic subfolder * fix formatting issues * fixed bug on code consolidation * fixed typo * added test for generic tests * added changelog entry * added logic to treat generic tests like macro tests * add generic test to macro_edges * fixed generic tests to match unique_ids * fixed test --- CHANGELOG.md | 1 + core/dbt/contracts/files.py | 6 +- core/dbt/contracts/graph/manifest.py | 2 +- core/dbt/contracts/graph/unparsed.py | 5 + core/dbt/parser/__init__.py | 3 +- core/dbt/parser/generic_test.py | 106 ++++++++++++++++++ core/dbt/parser/manifest.py | 30 +++-- core/dbt/parser/partial.py | 15 ++- core/dbt/parser/read_files.py | 13 ++- .../test_schema_v2_tests.py | 1 - .../test-files/generic_schema.yml | 9 ++ .../test-files/generic_test.sql | 26 +++++ .../test-files/generic_test_edited.sql | 26 +++++ .../test-files/generic_test_schema.yml | 10 ++ .../test_partial_parsing.py | 34 ++++++ test/unit/test_parser.py | 42 ++++++- 16 files changed, 304 insertions(+), 25 deletions(-) create mode 100644 core/dbt/parser/generic_test.py create mode 100644 test/integration/068_partial_parsing_tests/test-files/generic_schema.yml create mode 100644 test/integration/068_partial_parsing_tests/test-files/generic_test.sql create mode 100644 test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql create mode 100644 test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b23135c2af..d061a3f653a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Contributors: - Turns on the static parser by default and adds the flag `--no-static-parser` to disable it. ([#3377](https://github.com/dbt-labs/dbt-core/issues/3377), [#3939](https://github.com/dbt-labs/dbt-core/pull/3939)) - Generic test FQNs have changed to include the relative path, resource, and column (if applicable) where they are defined. This makes it easier to configure them from the `tests` block in `dbt_project.yml` ([#3259](https://github.com/dbt-labs/dbt-core/pull/3259), [#3880](https://github.com/dbt-labs/dbt-core/pull/3880) - Turn on partial parsing by default ([#3867](https://github.com/dbt-labs/dbt-core/issues/3867), [#3989](https://github.com/dbt-labs/dbt-core/issues/3989)) +- Generic test can now be added under a `generic` subfolder in the `test-paths` directory. ([#4052](https://github.com/dbt-labs/dbt-core/pull/4052)) ### Fixes - Add generic tests defined on sources to the manifest once, not twice ([#3347](https://github.com/dbt-labs/dbt/issues/3347), [#3880](https://github.com/dbt-labs/dbt/pull/3880)) diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 1293fd9a54c..125485bba28 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -18,7 +18,8 @@ class ParseFileType(StrEnum): Model = 'model' Snapshot = 'snapshot' Analysis = 'analysis' - Test = 'test' + SingularTest = 'singular_test' + GenericTest = 'generic_test' Seed = 'seed' Documentation = 'docs' Schema = 'schema' @@ -30,7 +31,8 @@ class ParseFileType(StrEnum): ParseFileType.Model: 'ModelParser', ParseFileType.Snapshot: 'SnapshotParser', ParseFileType.Analysis: 'AnalysisParser', - ParseFileType.Test: 'SingularTestParser', + ParseFileType.SingularTest: 'SingularTestParser', + ParseFileType.GenericTest: 'GenericTestParser', ParseFileType.Seed: 'SeedParser', ParseFileType.Documentation: 'DocumentationParser', ParseFileType.Schema: 'SchemaParser', diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 5796c36d889..04c7876b543 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -291,7 +291,7 @@ def build_node_edges(nodes: List[ManifestNode]): return _sort_values(forward_edges), _sort_values(backward_edges) -# Build a map of children of macros +# Build a map of children of macros and generic tests def build_macro_edges(nodes: List[Any]): forward_edges: Dict[str, List[str]] = { n.unique_id: [] for n in nodes if n.unique_id.startswith('macro') or n.depends_on.macros diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 673ca1476c5..a68a58a6810 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -44,6 +44,11 @@ class UnparsedMacro(UnparsedBaseNode, HasSQL): resource_type: NodeType = field(metadata={'restrict': [NodeType.Macro]}) +@dataclass +class UnparsedGenericTest(UnparsedBaseNode, HasSQL): + resource_type: NodeType = field(metadata={'restrict': [NodeType.Macro]}) + + @dataclass class UnparsedNode(UnparsedBaseNode, HasSQL): name: str diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py index ee6098d6fdf..b816211c141 100644 --- a/core/dbt/parser/__init__.py +++ b/core/dbt/parser/__init__.py @@ -1,6 +1,7 @@ from .analysis import AnalysisParser # noqa from .base import Parser, ConfiguredParser # noqa from .singular_test import SingularTestParser # noqa +from .generic_test import GenericTestParser # noqa from .docs import DocumentationParser # noqa from .hooks import HookParser # noqa from .macros import MacroParser # noqa @@ -10,6 +11,6 @@ from .snapshots import SnapshotParser # noqa from . import ( # noqa - analysis, base, singular_test, docs, hooks, macros, models, schemas, + analysis, base, generic_test, singular_test, docs, hooks, macros, models, schemas, snapshots ) diff --git a/core/dbt/parser/generic_test.py b/core/dbt/parser/generic_test.py new file mode 100644 index 00000000000..c3621d7568d --- /dev/null +++ b/core/dbt/parser/generic_test.py @@ -0,0 +1,106 @@ +from typing import Iterable, List + +import jinja2 + +from dbt.exceptions import CompilationException +from dbt.clients import jinja +from dbt.contracts.graph.parsed import ParsedGenericTestNode +from dbt.contracts.graph.unparsed import UnparsedMacro +from dbt.contracts.graph.parsed import ParsedMacro +from dbt.contracts.files import SourceFile +from dbt.logger import GLOBAL_LOGGER as logger +from dbt.node_types import NodeType +from dbt.parser.base import BaseParser +from dbt.parser.search import FileBlock +from dbt.utils import MACRO_PREFIX + + +class GenericTestParser(BaseParser[ParsedGenericTestNode]): + + @property + def resource_type(self) -> NodeType: + return NodeType.Macro + + @classmethod + def get_compiled_path(cls, block: FileBlock): + return block.path.relative_path + + def parse_generic_test( + self, block: jinja.BlockTag, base_node: UnparsedMacro, name: str + ) -> ParsedMacro: + unique_id = self.generate_unique_id(name) + + return ParsedMacro( + path=base_node.path, + macro_sql=block.full_block, + original_file_path=base_node.original_file_path, + package_name=base_node.package_name, + root_path=base_node.root_path, + resource_type=base_node.resource_type, + name=name, + unique_id=unique_id, + ) + + def parse_unparsed_generic_test( + self, base_node: UnparsedMacro + ) -> Iterable[ParsedMacro]: + try: + blocks: List[jinja.BlockTag] = [ + t for t in + jinja.extract_toplevel_blocks( + base_node.raw_sql, + allowed_blocks={'test'}, + collect_raw_data=False, + ) + if isinstance(t, jinja.BlockTag) + ] + except CompilationException as exc: + exc.add_node(base_node) + raise + + for block in blocks: + try: + ast = jinja.parse(block.full_block) + except CompilationException as e: + e.add_node(base_node) + raise + + # generic tests are structured as macros so we want to count the number of macro blocks + generic_test_nodes = list(ast.find_all(jinja2.nodes.Macro)) + + if len(generic_test_nodes) != 1: + # things have gone disastrously wrong, we thought we only + # parsed one block! + raise CompilationException( + f'Found multiple generic tests in {block.full_block}, expected 1', + node=base_node + ) + + generic_test_name = generic_test_nodes[0].name + + if not generic_test_name.startswith(MACRO_PREFIX): + continue + + name: str = generic_test_name.replace(MACRO_PREFIX, '') + node = self.parse_generic_test(block, base_node, name) + yield node + + def parse_file(self, block: FileBlock): + assert isinstance(block.file, SourceFile) + source_file = block.file + assert isinstance(source_file.contents, str) + original_file_path = source_file.path.original_file_path + logger.debug("Parsing {}".format(original_file_path)) + + # this is really only used for error messages + base_node = UnparsedMacro( + path=original_file_path, + original_file_path=original_file_path, + package_name=self.project.project_name, + raw_sql=source_file.contents, + root_path=self.project.project_root, + resource_type=NodeType.Macro, + ) + + for node in self.parse_unparsed_generic_test(base_node): + self.manifest.add_macro(block.file, node) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index d88e4c50255..729771c263c 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -49,6 +49,7 @@ ) from dbt.parser.base import Parser from dbt.parser.analysis import AnalysisParser +from dbt.parser.generic_test import GenericTestParser from dbt.parser.singular_test import SingularTestParser from dbt.parser.docs import DocumentationParser from dbt.parser.hooks import HookParser @@ -277,9 +278,10 @@ def load(self): if skip_parsing: logger.debug("Partial parsing enabled, no changes found, skipping parsing") else: - # Load Macros + # Load Macros and tests # We need to parse the macros first, so they're resolvable when - # the other files are loaded + # the other files are loaded. Also need to parse tests, specifically + # generic tests start_load_macros = time.perf_counter() self.load_and_parse_macros(project_parser_files) @@ -379,14 +381,22 @@ def load_and_parse_macros(self, project_parser_files): if project.project_name not in project_parser_files: continue parser_files = project_parser_files[project.project_name] - if 'MacroParser' not in parser_files: - continue - parser = MacroParser(project, self.manifest) - for file_id in parser_files['MacroParser']: - block = FileBlock(self.manifest.files[file_id]) - parser.parse_file(block) - # increment parsed path count for performance tracking - self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1 + if 'MacroParser' in parser_files: + parser = MacroParser(project, self.manifest) + for file_id in parser_files['MacroParser']: + block = FileBlock(self.manifest.files[file_id]) + parser.parse_file(block) + # increment parsed path count for performance tracking + self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1 + # generic tests hisotrically lived in the macros directoy but can now be nested + # in a /generic directory under /tests so we want to process them here as well + if 'GenericTestParser' in parser_files: + parser = GenericTestParser(project, self.manifest) + for file_id in parser_files['GenericTestParser']: + block = FileBlock(self.manifest.files[file_id]) + parser.parse_file(block) + # increment parsed path count for performance tracking + self._perf_info.parsed_path_count = self._perf_info.parsed_path_count + 1 self.build_macro_resolver() # Look at changed macros and update the macro.depends_on.macros diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 454446aa267..4b04ced4f3f 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -13,7 +13,12 @@ ParseFileType.Seed, ParseFileType.Snapshot, ParseFileType.Analysis, - ParseFileType.Test, + ParseFileType.SingularTest, +) + +mg_files = ( + ParseFileType.Macro, + ParseFileType.GenericTest, ) @@ -88,7 +93,7 @@ def build_file_diff(self): if self.saved_files[file_id].parse_file_type == ParseFileType.Schema: deleted_schema_files.append(file_id) else: - if self.saved_files[file_id].parse_file_type == ParseFileType.Macro: + if self.saved_files[file_id].parse_file_type in mg_files: changed_or_deleted_macro_file = True deleted.append(file_id) @@ -106,7 +111,7 @@ def build_file_diff(self): raise Exception(f"Serialization failure for {file_id}") changed_schema_files.append(file_id) else: - if self.saved_files[file_id].parse_file_type == ParseFileType.Macro: + if self.saved_files[file_id].parse_file_type in mg_files: changed_or_deleted_macro_file = True changed.append(file_id) file_diff = { @@ -213,7 +218,7 @@ def delete_from_saved(self, file_id): self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id) # macros - if saved_source_file.parse_file_type == ParseFileType.Macro: + if saved_source_file.parse_file_type in mg_files: self.delete_macro_file(saved_source_file, follow_references=True) # docs @@ -229,7 +234,7 @@ def update_in_saved(self, file_id): if new_source_file.parse_file_type in mssat_files: self.update_mssat_in_saved(new_source_file, old_source_file) - elif new_source_file.parse_file_type == ParseFileType.Macro: + elif new_source_file.parse_file_type in mg_files: self.update_macro_in_saved(new_source_file, old_source_file) elif new_source_file.parse_file_type == ParseFileType.Documentation: self.update_doc_in_saved(new_source_file, old_source_file) diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py index 1cf094805d4..44c2af7bfc6 100644 --- a/core/dbt/parser/read_files.py +++ b/core/dbt/parser/read_files.py @@ -93,6 +93,11 @@ def get_source_files(project, paths, extension, parse_file_type, saved_files): for fp in fp_list: if parse_file_type == ParseFileType.Seed: fb_list.append(load_seed_source_file(fp, project.project_name)) + # singular tests live in /tests but only generic tests live + # in /tests/generic so we want to skip those + elif (parse_file_type == ParseFileType.SingularTest and + 'generic/' in fp.relative_path): + continue else: file = load_source_file(fp, parse_file_type, project.project_name, saved_files) # only append the list if it has contents. added to fix #3568 @@ -137,7 +142,13 @@ def read_files(project, files, parser_files, saved_files): ) project_files['SingularTestParser'] = read_files_for_parser( - project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files + project, files, project.test_paths, '.sql', ParseFileType.SingularTest, saved_files + ) + + # all generic tests within /tests must be nested under a /generic subfolder + project_files['GenericTestParser'] = read_files_for_parser( + project, files, ["{}{}".format(test_path, '/generic') for test_path in project.test_paths], + '.sql', ParseFileType.GenericTest, saved_files ) project_files['SeedParser'] = read_files_for_parser( diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index d120b0ea004..886dac675ca 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -544,7 +544,6 @@ def test_postgres_test_context_with_macro_namespace(self): run_result = self.run_dbt(['test'], expect_pass=False) results = run_result.results results = sorted(results, key=lambda r: r.node.name) - # breakpoint() self.assertEqual(len(results), 4) # call_pkg_macro_model_c_ self.assertEqual(results[0].status, TestStatus.Fail) diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_schema.yml b/test/integration/068_partial_parsing_tests/test-files/generic_schema.yml new file mode 100644 index 00000000000..9a44074728a --- /dev/null +++ b/test/integration/068_partial_parsing_tests/test-files/generic_schema.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: orders + description: "Some order data" + columns: + - name: id + tests: + - unique diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test.sql b/test/integration/068_partial_parsing_tests/test-files/generic_test.sql new file mode 100644 index 00000000000..ca09beaadb7 --- /dev/null +++ b/test/integration/068_partial_parsing_tests/test-files/generic_test.sql @@ -0,0 +1,26 @@ +{% test is_odd(model, column_name) %} + +with validation as ( + + select + {{ column_name }} as odd_field + + from {{ model }} + +), + +validation_errors as ( + + select + odd_field + + from validation + -- if this is true, then odd_field is actually even! + where (odd_field % 2) = 0 + +) + +select * +from validation_errors + +{% endtest %} \ No newline at end of file diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql b/test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql new file mode 100644 index 00000000000..5a3b611ff7a --- /dev/null +++ b/test/integration/068_partial_parsing_tests/test-files/generic_test_edited.sql @@ -0,0 +1,26 @@ +{% test is_odd(model, column_name) %} + +with validation as ( + + select + {{ column_name }} as odd_field2 + + from {{ model }} + +), + +validation_errors as ( + + select + odd_field2 + + from validation + -- if this is true, then odd_field is actually even! + where (odd_field2 % 2) = 0 + +) + +select * +from validation_errors + +{% endtest %} \ No newline at end of file diff --git a/test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml b/test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml new file mode 100644 index 00000000000..c8307bc1021 --- /dev/null +++ b/test/integration/068_partial_parsing_tests/test-files/generic_test_schema.yml @@ -0,0 +1,10 @@ +version: 2 + +models: + - name: orders + description: "Some order data" + columns: + - name: id + tests: + - unique + - is_odd diff --git a/test/integration/068_partial_parsing_tests/test_partial_parsing.py b/test/integration/068_partial_parsing_tests/test_partial_parsing.py index bc15789d8a9..de882f24437 100644 --- a/test/integration/068_partial_parsing_tests/test_partial_parsing.py +++ b/test/integration/068_partial_parsing_tests/test_partial_parsing.py @@ -41,6 +41,7 @@ def setup_directories(self): # delete files in this directory without tests interfering with each other. os.mkdir(os.path.join(self.test_root_dir, 'models')) os.mkdir(os.path.join(self.test_root_dir, 'tests')) + os.mkdir(os.path.join(self.test_root_dir, 'tests/generic')) os.mkdir(os.path.join(self.test_root_dir, 'seeds')) os.mkdir(os.path.join(self.test_root_dir, 'macros')) os.mkdir(os.path.join(self.test_root_dir, 'analyses')) @@ -476,6 +477,7 @@ def test_postgres_skip_macros(self): results, log_output = self.run_dbt_and_capture(['--partial-parse', 'run']) self.assertTrue('Starting full parse.' in log_output) + class TestSnapshots(BasePPTest): @use_profile('postgres') @@ -508,3 +510,35 @@ def test_postgres_pp_snapshots(self): self.rm_file(normalize('snapshots/snapshot.sql')) results = self.run_dbt(["--partial-parse", "run"]) self.assertEqual(len(results), 1) + + +class TestTests(BasePPTest): + + @use_profile('postgres') + def test_postgres_pp_generic_tests(self): + + # initial run + self.setup_directories() + self.copy_file('test-files/orders.sql', 'models/orders.sql') + self.copy_file('test-files/generic_schema.yml', 'models/schema.yml') + results = self.run_dbt() + self.assertEqual(len(results), 1) + + # add generic test in test-path + self.copy_file('test-files/generic_test.sql', 'tests/generic/generic_test.sql') + self.copy_file('test-files/generic_test_schema.yml', 'models/schema.yml') + results = self.run_dbt(["--partial-parse", "run"]) + self.assertEqual(len(results), 1) + manifest = get_manifest() + test_id = 'test.test.is_odd_orders_id.82834fdc5b' + self.assertIn(test_id, manifest.nodes) + self.assertEqual(len(manifest.nodes), 3) + + # edit generic test in test-path + self.copy_file('test-files/generic_test_edited.sql', 'tests/generic/generic_test.sql') + results = self.run_dbt(["--partial-parse", "run"]) + self.assertEqual(len(results), 1) + manifest = get_manifest() + test_id = 'test.test.is_odd_orders_id.82834fdc5b' + self.assertIn(test_id, manifest.nodes) + self.assertEqual(len(manifest.nodes), 3) diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index e2043b7a92f..27d118a95d6 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -10,8 +10,8 @@ from dbt import tracking from dbt.exceptions import CompilationException from dbt.parser import ( - ModelParser, MacroParser, SingularTestParser, SchemaParser, - SnapshotParser, AnalysisParser + ModelParser, MacroParser, SingularTestParser, GenericTestParser, + SchemaParser, SnapshotParser, AnalysisParser ) from dbt.parser.schemas import ( TestablePatchParser, SourceParser, AnalysisPatchParser, MacroPatchParser @@ -28,8 +28,8 @@ ) from dbt.contracts.graph.parsed import ( ParsedModelNode, ParsedMacro, ParsedNodePatch, DependsOn, ColumnInfo, - ParsedSingularTestNode, ParsedSnapshotNode, ParsedAnalysisNode, - UnpatchedSourceDefinition + ParsedSingularTestNode, ParsedGenericTestNode, ParsedSnapshotNode, + ParsedAnalysisNode, UnpatchedSourceDefinition ) from dbt.contracts.graph.unparsed import Docs import itertools @@ -849,6 +849,40 @@ def test_basic(self): self.assertEqual(self.parser.manifest.files[file_id].nodes, ['test.snowplow.test_1']) +class GenericTestParserTest(BaseParserTest): +# generic tests in the test-paths directory currently leverage the macro parser + def setUp(self): + super().setUp() + self.parser = GenericTestParser( + project=self.snowplow_project_config, + manifest=Manifest() + ) + + def file_block_for(self, data, filename): + return super().file_block_for(data, filename, 'tests/generic') + + def test_basic(self): + raw_sql = '{% test not_null(model, column_name) %}select * from {{ model }} where {{ column_name }} is null {% endtest %}' + block = self.file_block_for(raw_sql, 'test_1.sql') + self.parser.manifest.files[block.file.file_id] = block.file + self.parser.parse_file(block) + node = list(self.parser.manifest.macros.values())[0] + expected = ParsedMacro( + name='test_not_null', + resource_type=NodeType.Macro, + unique_id='macro.snowplow.test_not_null', + package_name='snowplow', + original_file_path=normalize('tests/generic/test_1.sql'), + root_path=get_abs_os_path('./dbt_packages/snowplow'), + path=normalize('tests/generic/test_1.sql'), + macro_sql=raw_sql, + ) + assertEqualNodes(node, expected) + file_id = 'snowplow://' + normalize('tests/generic/test_1.sql') + self.assertIn(file_id, self.parser.manifest.files) + self.assertEqual(self.parser.manifest.files[file_id].macros, ['macro.snowplow.test_not_null']) + + class AnalysisParserTest(BaseParserTest): def setUp(self): super().setUp()