Partial parsing

Refactor parsing Store files in the manifest some speed improvements test fixes Remove some old python 2 compatibility stuff
dbt-labs · Aug 6, 2019 · 2388330 · 2388330
1 parent d08d291
commit 2388330
Show file tree

Hide file tree

Showing 68 changed files with 3,458 additions and 4,000 deletions.
diff --git a/core/dbt/__init__.py b/core/dbt/__init__.py
diff --git a/core/dbt/adapters/__init__.py b/core/dbt/adapters/__init__.py
diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py
@@ -2,6 +2,7 @@
 import linecache
 import os
 import tempfile
+from typing import List, Union, Set, Optional
 
 import jinja2
 import jinja2._compat
@@ -13,7 +14,7 @@
 import dbt.exceptions
 import dbt.utils
 
-from dbt.clients._jinja_blocks import BlockIterator
+from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
 
 from dbt.logger import GLOBAL_LOGGER as logger  # noqa
 
@@ -305,21 +306,24 @@ def undefined_error(msg):
     raise jinja2.exceptions.UndefinedError(msg)
 
 
-def extract_toplevel_blocks(data, allowed_blocks=None, collect_raw_data=True):
+def extract_toplevel_blocks(
+    data: str,
+    allowed_blocks: Optional[Set[str]] = None,
+    collect_raw_data: bool = True,
+) -> List[Union[BlockData, BlockTag]]:
     """Extract the top level blocks with matching block types from a jinja
     file, with some special handling for block nesting.
 
-    :param str data: The data to extract blocks from.
-    :param Optional[Set[str]] allowed_blocks: The names of the blocks to
-        extract from the file. They may not be nested within if/for blocks.
-        If None, use the default values.
-    :param bool collect_raw_data: If set, raw data between matched blocks will
-        also be part of the results, as `BlockData` objects. They have a
+    :param data: The data to extract blocks from.
+    :param allowed_blocks: The names of the blocks to extract from the file.
+        They may not be nested within if/for blocks. If None, use the default
+        values.
+    :param collect_raw_data: If set, raw data between matched blocks will also
+        be part of the results, as `BlockData` objects. They have a
         `block_type_name` field of `'__dbt_data'` and will never have a
         `block_name`.
-    :return List[Union[BlockData, BlockTag]]: A list of `BlockTag`s matching
-        the allowed block types and (if `collect_raw_data` is `True`)
-        `BlockData` objects.
+    :return: A list of `BlockTag`s matching the allowed block types and (if
+        `collect_raw_data` is `True`) `BlockData` objects.
     """
     return BlockIterator(data).lex_for_blocks(
         allowed_blocks=allowed_blocks,

diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py
@@ -24,13 +24,13 @@ def find_matching(root_path,
     absolute root path (`relative_paths_to_search`), and a `file_pattern`
     like '*.sql', returns information about the files. For example:
 
-    > find_matching('/root/path', 'models', '*.sql')
+    > find_matching('/root/path', ['models'], '*.sql')
 
       [ { 'absolute_path': '/root/path/models/model_one.sql',
-          'relative_path': 'models/model_one.sql',
+          'relative_path': 'model_one.sql',
           'searched_path': 'models' },
         { 'absolute_path': '/root/path/models/subdirectory/model_two.sql',
-          'relative_path': 'models/subdirectory/model_two.sql',
+          'relative_path': 'subdirectory/model_two.sql',
           'searched_path': 'models' } ]
     """
     matching = []

diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py
@@ -94,7 +94,7 @@ def recursively_prepend_ctes(model, manifest):
 
     model.prepend_ctes(prepended_ctes)
 
-    manifest.nodes[model.unique_id] = model
+    manifest.update_node(model)
 
     return (model, prepended_ctes, manifest)
 
@@ -167,7 +167,8 @@ def compile_node(self, node, manifest, extra_context=None):
     def write_graph_file(self, linker, manifest):
         filename = graph_file_name
         graph_path = os.path.join(self.config.target_path, filename)
-        linker.write_graph(graph_path, manifest)
+        if dbt.flags.WRITE_JSON:
+            linker.write_graph(graph_path, manifest)
 
     def link_node(self, linker, node, manifest):
         linker.add_node(node.unique_id)

diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py
@@ -18,7 +18,7 @@
 from dbt.ui import printer
 from dbt.utils import deep_map
 from dbt.utils import parse_cli_vars
-from dbt.parser.source_config import SourceConfig
+from dbt.source_config import SourceConfig
 
 from dbt.contracts.project import Project as ProjectContract
 from dbt.contracts.project import PackageConfig

diff --git a/core/dbt/context/runtime.py b/core/dbt/context/runtime.py
@@ -3,7 +3,7 @@
 import dbt.clients.jinja
 import dbt.context.common
 import dbt.flags
-from dbt.parser import ParserUtils
+from dbt.parser.util import ParserUtils
 
 from dbt.logger import GLOBAL_LOGGER as logger  # noqa
 

diff --git a/core/dbt/contracts/graph/compiled.py b/core/dbt/contracts/graph/compiled.py
@@ -11,6 +11,7 @@
     ParsedSourceDefinition,
     ParsedTestNode,
     TestConfig,
+    PARSED_TYPES,
 )
 from dbt.node_types import (
     NodeType,
@@ -187,6 +188,17 @@ def compiled_type_for(parsed: ParsedNode):
         return type(parsed)
 
 
+def parsed_instance_for(compiled: CompiledNode) -> ParsedNode:
+    cls = PARSED_TYPES.get(compiled.resource_type)
+    if cls is None:
+        # how???
+        raise ValueError('invalid resource_type: {}'
+                         .format(compiled.resource_type))
+
+    # validate=False to allow extra keys from copmiling
+    return cls.from_dict(compiled.to_dict(), validate=False)
+
+
 # We allow either parsed or compiled nodes, or parsed sources, as some
 # 'compile()' calls in the runner actually just return the original parsed
 # node they were given.