From 285479c0bc73901a33e8d155e170373cf4f5d04a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 5 Aug 2020 09:04:40 -0600 Subject: [PATCH 1/2] include project macros in the manifest the adapter stores locally --- CHANGELOG.md | 1 + core/dbt/adapters/base/impl.py | 30 ++++---- core/dbt/parser/manifest.py | 73 +++++++------------ core/dbt/perf_utils.py | 11 +-- .../override-get-columns-macros/macros.sql | 3 + .../override-get-columns-models/model.sql | 5 ++ .../016_macro_tests/test_macros.py | 24 ++++++ test/unit/test_graph.py | 6 +- test/unit/test_postgres_adapter.py | 13 ++-- test/unit/test_snowflake_adapter.py | 6 +- test/unit/utils.py | 9 +++ 11 files changed, 103 insertions(+), 78 deletions(-) create mode 100644 test/integration/016_macro_tests/override-get-columns-macros/macros.sql create mode 100644 test/integration/016_macro_tests/override-get-columns-models/model.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 5827b201602..897deb3d93f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Features - Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation)) +- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686)) ### Breaking changes diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index bc07a797e10..e575a9d82e0 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -158,7 +158,7 @@ def __init__(self, config): self.config = config self.cache = RelationsCache() self.connections = self.ConnectionManager(config) - self._internal_manifest_lazy: Optional[Manifest] = None + self._macro_manifest_lazy: Optional[Manifest] = None ### # Methods that pass through to the connection manager @@ -239,24 +239,26 @@ def type(cls) -> str: return cls.ConnectionManager.TYPE @property - def _internal_manifest(self) -> Manifest: - if self._internal_manifest_lazy is None: - return self.load_internal_manifest() - return self._internal_manifest_lazy + def _macro_manifest(self) -> Manifest: + if self._macro_manifest_lazy is None: + return self.load_macro_manifest() + return self._macro_manifest_lazy - def check_internal_manifest(self) -> Optional[Manifest]: + def check_macro_manifest(self) -> Optional[Manifest]: """Return the internal manifest (used for executing macros) if it's been initialized, otherwise return None. """ - return self._internal_manifest_lazy + return self._macro_manifest_lazy - def load_internal_manifest(self) -> Manifest: - if self._internal_manifest_lazy is None: + def load_macro_manifest(self) -> Manifest: + if self._macro_manifest_lazy is None: # avoid a circular import - from dbt.parser.manifest import load_internal_manifest - manifest = load_internal_manifest(self.config) - self._internal_manifest_lazy = manifest - return self._internal_manifest_lazy + from dbt.parser.manifest import load_macro_manifest + manifest = load_macro_manifest( + self.config, self.connections.set_query_header + ) + self._macro_manifest_lazy = manifest + return self._macro_manifest_lazy ### # Caching methods @@ -941,7 +943,7 @@ def execute_macro( context_override = {} if manifest is None: - manifest = self._internal_manifest + manifest = self._macro_manifest macro = manifest.find_macro_by_name( macro_name, self.config.project_name, project diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 7726c8a2491..a580f6dd3d0 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -11,8 +11,6 @@ from dbt import deprecations from dbt.adapters.factory import ( get_relation_class_by_name, - get_adapter_package_names, - get_include_paths, ) from dbt.helper_types import PathSet from dbt.logger import GLOBAL_LOGGER as logger, DbtProcessState @@ -123,28 +121,6 @@ def __init__( ) self._loaded_file_cache: Dict[str, FileBlock] = {} - def _load_macros( - self, - old_results: Optional[ParseResult], - internal_manifest: Optional[Manifest] = None, - ) -> None: - projects = self.all_projects - if internal_manifest is not None: - # skip internal packages - packages = get_adapter_package_names( - self.root_project.credentials.type - ) - projects = { - k: v for k, v in self.all_projects.items() if k not in packages - } - self.results.macros.update(internal_manifest.macros) - self.results.files.update(internal_manifest.files) - - for project in projects.values(): - parser = MacroParser(self.results, project) - for path in parser.search(): - self.parse_with_cache(path, parser, old_results) - def parse_with_cache( self, path: FilePath, @@ -201,25 +177,26 @@ def parse_project( def load_only_macros(self) -> Manifest: old_results = self.read_parse_results() - self._load_macros(old_results, internal_manifest=None) + + for project in self.all_projects.values(): + parser = MacroParser(self.results, project) + for path in parser.search(): + self.parse_with_cache(path, parser, old_results) + # make a manifest with just the macros to get the context macro_manifest = Manifest.from_macros( macros=self.results.macros, files=self.results.files ) + self.macro_hook(macro_manifest) return macro_manifest - def load(self, internal_manifest: Optional[Manifest] = None): + def load(self, macro_manifest: Manifest): old_results = self.read_parse_results() if old_results is not None: logger.debug('Got an acceptable cached parse result') - self._load_macros(old_results, internal_manifest=internal_manifest) - # make a manifest with just the macros to get the context - macro_manifest = Manifest.from_macros( - macros=self.results.macros, - files=self.results.files - ) - self.macro_hook(macro_manifest) + self.results.macros.update(macro_manifest.macros) + self.results.files.update(macro_manifest.files) for project in self.all_projects.values(): # parse a single project @@ -352,7 +329,7 @@ def create_manifest(self) -> Manifest: def load_all( cls, root_config: RuntimeConfig, - internal_manifest: Optional[Manifest], + macro_manifest: Manifest, macro_hook: Callable[[Manifest], Any], ) -> Manifest: with PARSING_STATE: @@ -367,7 +344,7 @@ def load_all( project_names=''.join(v1_configs) ) loader = cls(root_config, projects, macro_hook) - loader.load(internal_manifest=internal_manifest) + loader.load(macro_manifest=macro_manifest) loader.write_parse_results() manifest = loader.create_manifest() _check_manifest(manifest, root_config) @@ -375,10 +352,14 @@ def load_all( return manifest @classmethod - def load_internal(cls, root_config: RuntimeConfig) -> Manifest: + def load_macros( + cls, + root_config: RuntimeConfig, + macro_hook: Callable[[Manifest], Any], + ) -> Manifest: with PARSING_STATE: - projects = load_internal_projects(root_config) - loader = cls(root_config, projects) + projects = root_config.load_dependencies() + loader = cls(root_config, projects, macro_hook) return loader.load_only_macros() @@ -681,18 +662,16 @@ def process_node( _process_docs_for_node(ctx, node) -def load_internal_projects(config): - project_paths = get_include_paths(config.credentials.type) - return dict(_load_projects(config, project_paths)) - - -def load_internal_manifest(config: RuntimeConfig) -> Manifest: - return ManifestLoader.load_internal(config) +def load_macro_manifest( + config: RuntimeConfig, + macro_hook: Callable[[Manifest], Any], +) -> Manifest: + return ManifestLoader.load_macros(config, macro_hook) def load_manifest( config: RuntimeConfig, - internal_manifest: Optional[Manifest], + macro_manifest: Manifest, macro_hook: Callable[[Manifest], Any], ) -> Manifest: - return ManifestLoader.load_all(config, internal_manifest, macro_hook) + return ManifestLoader.load_all(config, macro_manifest, macro_hook) diff --git a/core/dbt/perf_utils.py b/core/dbt/perf_utils.py index 21f9b7045b3..4e8f74033f4 100644 --- a/core/dbt/perf_utils.py +++ b/core/dbt/perf_utils.py @@ -15,9 +15,10 @@ def get_full_manifest(config: RuntimeConfig) -> Manifest: attached to the adapter for any methods that need it. """ adapter = get_adapter(config) # type: ignore - internal: Manifest = adapter.load_internal_manifest() + internal: Manifest = adapter.load_macro_manifest() - def set_header(manifest: Manifest) -> None: - adapter.connections.set_query_header(manifest) - - return load_manifest(config, internal, set_header) + return load_manifest( + config, + internal, + adapter.connections.set_query_header, + ) diff --git a/test/integration/016_macro_tests/override-get-columns-macros/macros.sql b/test/integration/016_macro_tests/override-get-columns-macros/macros.sql new file mode 100644 index 00000000000..73fe0ccfb10 --- /dev/null +++ b/test/integration/016_macro_tests/override-get-columns-macros/macros.sql @@ -0,0 +1,3 @@ +{% macro get_columns_in_relation(relation) %} + {{ return('a string') }} +{% endmacro %} diff --git a/test/integration/016_macro_tests/override-get-columns-models/model.sql b/test/integration/016_macro_tests/override-get-columns-models/model.sql new file mode 100644 index 00000000000..7be007e2444 --- /dev/null +++ b/test/integration/016_macro_tests/override-get-columns-models/model.sql @@ -0,0 +1,5 @@ +{% set result = adapter.get_columns_in_relation(this) %} +{% if execute and result != 'a string' %} + {% do exceptions.raise_compiler_error('overriding get_columns_in_relation failed') %} +{% endif %} +select 1 as id diff --git a/test/integration/016_macro_tests/test_macros.py b/test/integration/016_macro_tests/test_macros.py index 0b856f70f23..4a554e313cc 100644 --- a/test/integration/016_macro_tests/test_macros.py +++ b/test/integration/016_macro_tests/test_macros.py @@ -93,3 +93,27 @@ def test_postgres_invalid_macro(self): self.run_dbt(['run']) assert "In dispatch: No macro named 'dispatch_to_nowhere' found" in str(exc.value) + + +class TestMacroOverrideBuiltin(DBTIntegrationTest): + @property + def schema(self): + return "test_macros_016" + + @property + def models(self): + return 'override-get-columns-models' + + @property + def project_config(self): + return { + 'config-version': 2, + 'macro-paths': ['override-get-columns-macros'], + } + + + @use_profile('postgres') + def test_postgres_overrides(self): + # the first time, the model doesn't exist + self.run_dbt() + self.run_dbt() diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 05888fba3fb..6c444768df2 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -104,7 +104,7 @@ def _mock_parse_result(config, all_projects): self.mock_source_file = self.load_source_file_patcher.start() self.mock_source_file.side_effect = lambda path: [n for n in self.mock_models if n.path == path][0] - self.internal_manifest = Manifest.from_macros(macros={ + self.macro_manifest = Manifest.from_macros(macros={ n.unique_id: n for n in generate_name_macros('test_models_compile') }) @@ -161,7 +161,7 @@ def use_models(self, models): def load_manifest(self, config): loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config}) - loader.load(internal_manifest=self.internal_manifest) + loader.load(macro_manifest=self.macro_manifest) return loader.create_manifest() def test__single_model(self): @@ -319,7 +319,7 @@ def test__partial_parse(self): config = self.get_config() loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config}) - loader.load(internal_manifest=self.internal_manifest) + loader.load(macro_manifest=self.macro_manifest) loader.create_manifest() results = loader.results diff --git a/test/unit/test_postgres_adapter.py b/test/unit/test_postgres_adapter.py index 6c5b3b0eb12..ebeb5b9b306 100644 --- a/test/unit/test_postgres_adapter.py +++ b/test/unit/test_postgres_adapter.py @@ -16,7 +16,7 @@ from psycopg2 import extensions as psycopg2_extensions from psycopg2 import DatabaseError -from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions +from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions, load_internal_manifest_macros class TestPostgresAdapter(unittest.TestCase): @@ -276,9 +276,14 @@ def setUp(self): self.patcher = mock.patch('dbt.adapters.postgres.connections.psycopg2') self.psycopg2 = self.patcher.start() + self.load_patch = mock.patch('dbt.parser.manifest.make_parse_result') + self.mock_parse_result = self.load_patch.start() + self.mock_parse_result.return_value = ParseResult.rpc() + self.psycopg2.connect.return_value = self.handle self.adapter = PostgresAdapter(self.config) - self.adapter.connections.query_header = MacroQueryStringSetter(self.config, mock.MagicMock(macros={})) + self.adapter._macro_manifest_lazy = load_internal_manifest_macros(self.config) + self.adapter.connections.query_header = MacroQueryStringSetter(self.config, self.adapter._macro_manifest_lazy) self.qh_patch = mock.patch.object(self.adapter.connections.query_header, 'add') self.mock_query_header_add = self.qh_patch.start() @@ -286,10 +291,6 @@ def setUp(self): self.adapter.acquire_connection() inject_adapter(self.adapter, PostgresPlugin) - self.load_patch = mock.patch('dbt.parser.manifest.make_parse_result') - self.mock_parse_result = self.load_patch.start() - self.mock_parse_result.return_value = ParseResult.rpc() - def tearDown(self): # we want a unique self.handle every time. self.adapter.cleanup_connections() diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py index 9d0c60705f3..131e005f40c 100644 --- a/test/unit/test_snowflake_adapter.py +++ b/test/unit/test_snowflake_adapter.py @@ -14,7 +14,7 @@ from dbt.parser.results import ParseResult from snowflake import connector as snowflake_connector -from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions +from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions, load_internal_manifest_macros class TestSnowflakeAdapter(unittest.TestCase): @@ -65,8 +65,8 @@ def setUp(self): self.snowflake.return_value = self.handle self.adapter = SnowflakeAdapter(self.config) - - self.adapter.connections.query_header = MacroQueryStringSetter(self.config, mock.MagicMock(macros={})) + self.adapter._macro_manifest_lazy = load_internal_manifest_macros(self.config) + self.adapter.connections.query_header = MacroQueryStringSetter(self.config, self.adapter._macro_manifest_lazy) self.qh_patch = mock.patch.object(self.adapter.connections.query_header, 'add') self.mock_query_header_add = self.qh_patch.start() diff --git a/test/unit/utils.py b/test/unit/utils.py index 64208bee0f2..4900a77b9e0 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -289,3 +289,12 @@ def MockDocumentation(package, name, **kwargs): ) doc.name = name return doc + + +def load_internal_manifest_macros(config, macro_hook = lambda m: None): + from dbt.adapters.factory import get_include_paths + from dbt.parser.manifest import ManifestLoader + paths = get_include_paths(config.credentials.type) + projects = {k: v for k, v in config.load_dependencies().items() if k.startswith('dbt')} + loader = ManifestLoader(config, projects, macro_hook) + return loader.load_only_macros() From 4274139210272a2246af1d330458d552f725ae11 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 5 Aug 2020 14:37:56 -0600 Subject: [PATCH 2/2] Fix the test Make sure "dbt deps" reloads the full manifest Make sure commands that reload the dbt_project.yml properly reset the config (including adapters) --- core/dbt/adapters/base/impl.py | 4 ++++ core/dbt/config/runtime.py | 3 +++ core/dbt/perf_utils.py | 10 +++++++++- core/dbt/rpc/task_manager.py | 6 +++++- core/dbt/task/rpc/cli.py | 4 +++- core/dbt/utils.py | 2 +- 6 files changed, 25 insertions(+), 4 deletions(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index e575a9d82e0..7c1cc592530 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -260,6 +260,10 @@ def load_macro_manifest(self) -> Manifest: self._macro_manifest_lazy = manifest return self._macro_manifest_lazy + def clear_macro_manifest(self): + if self._macro_manifest_lazy is not None: + self._macro_manifest_lazy = None + ### # Caching methods ### diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index e11471c9830..184e2732762 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -355,6 +355,9 @@ def load_dependencies(self) -> Mapping[str, 'RuntimeConfig']: self.dependencies = all_projects return self.dependencies + def clear_dependencies(self): + self.dependencies = None + def load_projects( self, paths: Iterable[Path] ) -> Iterator[Tuple[str, 'RuntimeConfig']]: diff --git a/core/dbt/perf_utils.py b/core/dbt/perf_utils.py index 4e8f74033f4..782859a88a3 100644 --- a/core/dbt/perf_utils.py +++ b/core/dbt/perf_utils.py @@ -7,7 +7,11 @@ from dbt.config import RuntimeConfig -def get_full_manifest(config: RuntimeConfig) -> Manifest: +def get_full_manifest( + config: RuntimeConfig, + *, + reset: bool = False, +) -> Manifest: """Load the full manifest, using the adapter's internal manifest if it exists to skip parsing internal (dbt + plugins) macros a second time. @@ -15,6 +19,10 @@ def get_full_manifest(config: RuntimeConfig) -> Manifest: attached to the adapter for any methods that need it. """ adapter = get_adapter(config) # type: ignore + if reset: + config.clear_dependencies() + adapter.clear_macro_manifest() + internal: Manifest = adapter.load_macro_manifest() return load_manifest( diff --git a/core/dbt/rpc/task_manager.py b/core/dbt/rpc/task_manager.py index 83ab7c0eabc..91466fb9b59 100644 --- a/core/dbt/rpc/task_manager.py +++ b/core/dbt/rpc/task_manager.py @@ -8,6 +8,7 @@ import dbt.exceptions import dbt.flags as flags +from dbt.adapters.factory import reset_adapters, register_adapter from dbt.contracts.graph.manifest import Manifest from dbt.contracts.rpc import ( LastParse, @@ -126,6 +127,8 @@ def reload_manifest(self) -> bool: def reload_config(self): config = self.config.from_args(self.args) self.config = config + reset_adapters() + register_adapter(config) return config def add_request(self, request_handler: TaskHandlerProtocol): @@ -184,7 +187,7 @@ def set_parsing(self) -> bool: return True def parse_manifest(self) -> None: - self.manifest = get_full_manifest(self.config) + self.manifest = get_full_manifest(self.config, reset=True) def set_compile_exception(self, exc, logs=List[LogMessage]) -> None: assert self.last_parse.state == ManifestStatus.Compiling, \ @@ -227,6 +230,7 @@ def get_handler( return None task = self.rpc_task(method) + return task def task_table(self) -> List[TaskRow]: diff --git a/core/dbt/task/rpc/cli.py b/core/dbt/task/rpc/cli.py index e39d82ead92..20855f1dac6 100644 --- a/core/dbt/task/rpc/cli.py +++ b/core/dbt/task/rpc/cli.py @@ -104,7 +104,9 @@ def handle_request(self) -> Result: if dumped != self.args.vars: self.real_task.args.vars = dumped if isinstance(self.real_task, RemoteManifestMethod): - self.real_task.manifest = get_full_manifest(self.config) + self.real_task.manifest = get_full_manifest( + self.config, reset=True + ) # we parsed args from the cli, so we're set on that front return self.real_task.handle_request() diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 8f446cab665..aac730ebc51 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -320,7 +320,7 @@ def default(self, obj): if hasattr(obj, 'to_dict'): # if we have a to_dict we should try to serialize the result of # that! - obj = obj.to_dict() + return obj.to_dict() return super().default(obj)