diff --git a/CHANGELOG.md b/CHANGELOG.md index 45b98820875..8f4b97f8ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,15 +43,56 @@ Contributors: - [@azhard](https://github.com/azhard) ([#2517](https://github.com/fishtown-analytics/dbt/pull/2517), ([#2521](https://github.com/fishtown-analytics/dbt/pull/2521)), [#2547](https://github.com/fishtown-analytics/dbt/pull/2547)) - [@alepuccetti](https://github.com/alepuccetti) ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)) + ## dbt 0.17.1 (Release TBD) + +## dbt 0.17.1rc4 (July 08, 2020) + + +### Fixes +- dbt native rendering now requires an opt-in with the `as_native` filter. Added `as_bool` and `as_number` filters, which are like `as_native` but also type-check. ([#2612](https://github.com/fishtown-analytics/dbt/issues/2612), [#2618](https://github.com/fishtown-analytics/dbt/pull/2618)) + + +## dbt 0.17.1rc3 (July 01, 2020) + + +### Fixes +- dbt native rendering now avoids turning quoted strings into unquoted strings ([#2597](https://github.com/fishtown-analytics/dbt/issues/2597), [#2599](https://github.com/fishtown-analytics/dbt/pull/2599)) +- Hash name of local packages ([#2600](https://github.com/fishtown-analytics/dbt/pull/2600)) +- On bigquery, also persist docs for seeds ([#2598](https://github.com/fishtown-analytics/dbt/issues/2598), [#2601](https://github.com/fishtown-analytics/dbt/pull/2601)) +- Swallow all file-writing related errors on Windows, regardless of path length or exception type. ([#2603](https://github.com/fishtown-analytics/dbt/pull/2603)) + + +## dbt 0.17.1rc2 (June 25, 2020) + +### Fixes +- dbt config-version: 2 now properly defers rendering `+pre-hook` and `+post-hook` fields. ([#2583](https://github.com/fishtown-analytics/dbt/issues/2583), [#2854](https://github.com/fishtown-analytics/dbt/pull/2854)) +- dbt handles too-long paths on windows that do not report that the path is too long ([#2591](https://github.com/fishtown-analytics/dbt/pull/2591)) + + +## dbt 0.17.1rc1 (June 19, 2020) + + ### Fixes - dbt compile and ls no longer create schemas if they don't already exist ([#2525](https://github.com/fishtown-analytics/dbt/issues/2525), [#2528](https://github.com/fishtown-analytics/dbt/pull/2528)) - `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534)) - `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535)) +<<<<<<< HEAD - dbt again respects config aliases in config() calls ([#2557](https://github.com/fishtown-analytics/dbt/issues/2557), [#2559](https://github.com/fishtown-analytics/dbt/pull/2559)) +======= +- Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)]) +- `persist_docs` now takes into account descriptions for nested columns in bigquery ([#2549](https://github.com/fishtown-analytics/dbt/issues/2549), [#2550](https://github.com/fishtown-analytics/dbt/pull/2550)) +- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566)) +- dbt again respects config aliases in config() calls and dbt_project.yml ([#2557](https://github.com/fishtown-analytics/dbt/issues/2557), [#2559](https://github.com/fishtown-analytics/dbt/pull/2559), [#2575](https://github.com/fishtown-analytics/dbt/pull/2575)) +- fix unclickable nodes in the dbt Docs DAG viz ([#101](https://github.com/fishtown-analytics/dbt-docs/pull/101)) +- fix null database names for Spark projects in dbt Docs site ([#96](https://github.com/fishtown-analytics/dbt-docs/pull/96)) + +Contributors: + - [@bodschut](https://github.com/bodschut) ([#2550](https://github.com/fishtown-analytics/dbt/pull/2550)) +>>>>>>> dev/0.17.1 ## dbt 0.17.0 (June 08, 2020) diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index 07a344338b7..c398c27363b 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -8,7 +8,8 @@ from contextlib import contextmanager from itertools import chain, islice from typing import ( - List, Union, Set, Optional, Dict, Any, Iterator, Type, NoReturn, Tuple + List, Union, Set, Optional, Dict, Any, Iterator, Type, NoReturn, Tuple, + Callable ) import jinja2 @@ -28,7 +29,7 @@ from dbt.contracts.graph.parsed import ParsedSchemaTestNode from dbt.exceptions import ( InternalException, raise_compiler_error, CompilationException, - invalid_materialization_argument, MacroReturn + invalid_materialization_argument, MacroReturn, JinjaRenderingException ) from dbt import flags from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -111,6 +112,24 @@ class TextMarker(str): """ +class NativeMarker(str): + """A special native-env marker that indicates the field should be passed to + literal_eval. + """ + + +class BoolMarker(NativeMarker): + pass + + +class NumberMarker(NativeMarker): + pass + + +def _is_number(value) -> bool: + return isinstance(value, (int, float)) and not isinstance(value, bool) + + def quoted_native_concat(nodes): """This is almost native_concat from the NativeTemplate, except in the special case of a single argument that is a quoted string and returns a @@ -119,19 +138,31 @@ def quoted_native_concat(nodes): head = list(islice(nodes, 2)) if not head: - return None + return '' if len(head) == 1: raw = head[0] if isinstance(raw, TextMarker): return str(raw) + elif not isinstance(raw, NativeMarker): + # return non-strings as-is + return raw else: - raw = "".join([str(v) for v in chain(head, nodes)]) + # multiple nodes become a string. + return "".join([str(v) for v in chain(head, nodes)]) try: result = literal_eval(raw) except (ValueError, SyntaxError, MemoryError): - return raw + result = raw + if isinstance(raw, BoolMarker) and not isinstance(result, bool): + raise JinjaRenderingException( + f"Could not convert value '{raw!s}' into type 'bool'" + ) + if isinstance(raw, NumberMarker) and not _is_number(result): + raise JinjaRenderingException( + f"Could not convert value '{raw!s}' into type 'number'" + ) return result @@ -413,6 +444,22 @@ def __reduce__(self): return Undefined +NATIVE_FILTERS: Dict[str, Callable[[Any], Any]] = { + 'as_text': TextMarker, + 'as_bool': BoolMarker, + 'as_native': NativeMarker, + 'as_number': NumberMarker, +} + + +TEXT_FILTERS: Dict[str, Callable[[Any], Any]] = { + 'as_text': lambda x: x, + 'as_bool': lambda x: x, + 'as_native': lambda x: x, + 'as_number': lambda x: x, +} + + def get_environment( node=None, capture_macros: bool = False, @@ -432,13 +479,13 @@ def get_environment( text_filter: Type if native: env_cls = NativeSandboxEnvironment - text_filter = TextMarker + filters = NATIVE_FILTERS else: env_cls = MacroFuzzEnvironment - text_filter = str + filters = TEXT_FILTERS env = env_cls(**args) - env.filters['as_text'] = text_filter + env.filters.update(filters) return env diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index b167d192e4e..0b09edaa1b7 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -19,6 +19,12 @@ from dbt.logger import GLOBAL_LOGGER as logger +if sys.platform == 'win32': + from ctypes import WinDLL, c_bool +else: + WinDLL = None + c_bool = None + def find_matching( root_path: str, @@ -66,6 +72,7 @@ def find_matching( def load_file_contents(path: str, strip: bool = True) -> str: + path = convert_path(path) with open(path, 'rb') as handle: to_return = handle.read().decode('utf-8') @@ -81,6 +88,7 @@ def make_directory(path: str) -> None: exist. This function handles the case where two threads try to create a directory at once. """ + path = convert_path(path) if not os.path.exists(path): # concurrent writes that try to create the same dir can fail try: @@ -99,6 +107,7 @@ def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool: exists. The file is saved with contents `contents` """ if overwrite or not os.path.exists(path): + path = convert_path(path) with open(path, 'w') as fh: fh.write(contents) return True @@ -121,10 +130,35 @@ def supports_symlinks() -> bool: def write_file(path: str, contents: str = '') -> bool: - make_directory(os.path.dirname(path)) - with open(path, 'w', encoding='utf-8') as f: - f.write(str(contents)) - + path = convert_path(path) + try: + make_directory(os.path.dirname(path)) + with open(path, 'w', encoding='utf-8') as f: + f.write(str(contents)) + except Exception as exc: + # note that you can't just catch FileNotFound, because sometimes + # windows apparently raises something else. + # It's also not sufficient to look at the path length, because + # sometimes windows fails to write paths that are less than the length + # limit. So on windows, suppress all errors that happen from writing + # to disk. + if os.name == 'nt': + # sometimes we get a winerror of 3 which means the path was + # definitely too long, but other times we don't and it means the + # path was just probably too long. This is probably based on the + # windows/python version. + if getattr(exc, 'winerror', 0) == 3: + reason = 'Path was too long' + else: + reason = 'Path was possibly too long' + # all our hard work and the path was still too long. Log and + # continue. + logger.debug( + f'Could not write to path {path}({len(path)} characters): ' + f'{reason}\nexception: {exc}' + ) + else: + raise return True @@ -163,7 +197,7 @@ def rmdir(path: str) -> None: different permissions on Windows. Otherwise, removing directories (eg. cloned via git) can cause rmtree to throw a PermissionError exception """ - logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform)) + path = convert_path(path) if sys.platform == 'win32': onerror = _windows_rmdir_readonly else: @@ -172,15 +206,90 @@ def rmdir(path: str) -> None: shutil.rmtree(path, onerror=onerror) +def _win_prepare_path(path: str) -> str: + """Given a windows path, prepare it for use by making sure it is absolute + and normalized. + """ + path = os.path.normpath(path) + + # if a path starts with '\', splitdrive() on it will return '' for the + # drive, but the prefix requires a drive letter. So let's add the drive + # letter back in. + # Unless it starts with '\\'. In that case, the path is a UNC mount point + # and splitdrive will be fine. + if not path.startswith('\\\\') and path.startswith('\\'): + curdrive = os.path.splitdrive(os.getcwd())[0] + path = curdrive + path + + # now our path is either an absolute UNC path or relative to the current + # directory. If it's relative, we need to make it absolute or the prefix + # won't work. `ntpath.abspath` allegedly doesn't always play nice with long + # paths, so do this instead. + if not os.path.splitdrive(path)[0]: + path = os.path.join(os.getcwd(), path) + + return path + + +def _supports_long_paths() -> bool: + if sys.platform != 'win32': + return True + # Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because + # of pointer caching in a comment here: + # https://stackoverflow.com/a/35097999/11262881 + # I don't know exaclty what he means, but I am inclined to believe him as + # he's pretty active on Python windows bugs! + try: + dll = WinDLL('ntdll') + except OSError: # I don't think this happens? you need ntdll to run python + return False + # not all windows versions have it at all + if not hasattr(dll, 'RtlAreLongPathsEnabled'): + return False + # tell windows we want to get back a single unsigned byte (a bool). + dll.RtlAreLongPathsEnabled.restype = c_bool + return dll.RtlAreLongPathsEnabled() + + +def convert_path(path: str) -> str: + """Convert a path that dbt has, which might be >260 characters long, to one + that will be writable/readable on Windows. + + On other platforms, this is a no-op. + """ + # some parts of python seem to append '\*.*' to strings, better safe than + # sorry. + if len(path) < 250: + return path + if _supports_long_paths(): + return path + + prefix = '\\\\?\\' + # Nothing to do + if path.startswith(prefix): + return path + + path = _win_prepare_path(path) + + # add the prefix. The check is just in case os.getcwd() does something + # unexpected - I believe this if-state should always be True though! + if not path.startswith(prefix): + path = prefix + path + return path + + def remove_file(path: str) -> None: + path = convert_path(path) os.remove(path) def path_exists(path: str) -> bool: + path = convert_path(path) return os.path.lexists(path) def path_is_symlink(path: str) -> bool: + path = convert_path(path) return os.path.islink(path) @@ -326,6 +435,7 @@ def run_cmd( def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None: + path = convert_path(path) connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10)) response = requests.get(url, timeout=connection_timeout) with open(path, 'wb') as handle: @@ -334,6 +444,8 @@ def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None: def rename(from_path: str, to_path: str, force: bool = False) -> None: + from_path = convert_path(from_path) + to_path = convert_path(to_path) is_symlink = path_is_symlink(to_path) if os.path.exists(to_path) and force: @@ -348,6 +460,7 @@ def rename(from_path: str, to_path: str, force: bool = False) -> None: def untar_package( tar_path: str, dest_dir: str, rename_to: Optional[str] = None ) -> None: + tar_path = convert_path(tar_path) tar_dir_name = None with tarfile.open(tar_path, 'r') as tarball: tarball.extractall(dest_dir) @@ -384,6 +497,8 @@ def move(src, dst): This is almost identical to the real shutil.move, except it uses our rmtree and skips handling non-windows OSes since the existing one works ok there. """ + src = convert_path(src) + dst = convert_path(dst) if os.name != 'nt': return shutil.move(src, dst) @@ -418,4 +533,5 @@ def rmtree(path): """Recursively remove path. On permissions errors on windows, try to remove the read-only flag and try again. """ + path = convert_path(path) return shutil.rmtree(path, onerror=chmod_and_retry) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index b3c145348ca..f5de7a0d323 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -8,6 +8,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.contracts.connection import Credentials, HasCredentials from dbt.contracts.project import ProfileConfig, UserConfig +from dbt.exceptions import CompilationException from dbt.exceptions import DbtProfileError from dbt.exceptions import DbtProjectError from dbt.exceptions import ValidationException @@ -268,7 +269,10 @@ def render_profile( raw_profile, profile_name, target_name ) - profile_data = renderer.render_data(raw_profile_data) + try: + profile_data = renderer.render_data(raw_profile_data) + except CompilationException as exc: + raise DbtProfileError(str(exc)) from exc return target_name, profile_data @classmethod diff --git a/core/dbt/config/renderer.py b/core/dbt/config/renderer.py index f65625c9e3b..9e37b0e70a9 100644 --- a/core/dbt/config/renderer.py +++ b/core/dbt/config/renderer.py @@ -78,7 +78,7 @@ def should_render_keypath_v1(self, keypath: Keypath) -> bool: if first in {'on-run-start', 'on-run-end', 'query-comment'}: return False # models have two things to avoid - if first in {'seeds', 'models', 'snapshots', 'seeds'}: + if first in {'seeds', 'models', 'snapshots'}: # model-level hooks if 'pre-hook' in keypath or 'post-hook' in keypath: return False @@ -102,11 +102,12 @@ def should_render_keypath_v2(self, keypath: Keypath) -> bool: return False if first in {'seeds', 'models', 'snapshots', 'seeds'}: + keypath_parts = { + (k.lstrip('+') if isinstance(k, str) else k) + for k in keypath + } # model-level hooks - if 'pre-hook' in keypath or 'post-hook' in keypath: - return False - # model-level 'vars' declarations - if 'vars' in keypath: + if 'pre-hook' in keypath_parts or 'post-hook' in keypath_parts: return False return True diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 729c29e0c28..06366d691c5 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -257,6 +257,10 @@ def __reduce__(self): return (JSONValidationException, (self.typename, self.errors)) +class JinjaRenderingException(CompilationException): + pass + + class UnknownAsyncIDException(Exception): CODE = 10012 MESSAGE = 'RPC server got an unknown async ID' diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 7f3e4cb864c..ffcb3958081 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -28,13 +28,8 @@ def env_set_truthy(key: str) -> Optional[str]: def _get_context(): - if os.name == 'posix' and os.uname().sysname.lower() != 'darwin': - # on linux fork is available and it's fast - return multiprocessing.get_context('fork') - else: - # on windows, spawn is the only choice. - # On osx, fork is buggy: https://bugs.python.org/issue33725 - return multiprocessing.get_context('spawn') + # TODO: change this back to use fork() on linux when we have made that safe + return multiprocessing.get_context('spawn') MP_CONTEXT = _get_context() diff --git a/core/dbt/include/index.html b/core/dbt/include/index.html index 0695b123744..9a20c3cc558 100644 --- a/core/dbt/include/index.html +++ b/core/dbt/include/index.html @@ -8,7 +8,7 @@ - + @@ -24,7 +24,7 @@
-