diff --git a/CHANGELOG.md b/CHANGELOG.md index 45b98820875..8f4b97f8ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,15 +43,56 @@ Contributors: - [@azhard](https://github.com/azhard) ([#2517](https://github.com/fishtown-analytics/dbt/pull/2517), ([#2521](https://github.com/fishtown-analytics/dbt/pull/2521)), [#2547](https://github.com/fishtown-analytics/dbt/pull/2547)) - [@alepuccetti](https://github.com/alepuccetti) ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)) + ## dbt 0.17.1 (Release TBD) + +## dbt 0.17.1rc4 (July 08, 2020) + + +### Fixes +- dbt native rendering now requires an opt-in with the `as_native` filter. Added `as_bool` and `as_number` filters, which are like `as_native` but also type-check. ([#2612](https://github.com/fishtown-analytics/dbt/issues/2612), [#2618](https://github.com/fishtown-analytics/dbt/pull/2618)) + + +## dbt 0.17.1rc3 (July 01, 2020) + + +### Fixes +- dbt native rendering now avoids turning quoted strings into unquoted strings ([#2597](https://github.com/fishtown-analytics/dbt/issues/2597), [#2599](https://github.com/fishtown-analytics/dbt/pull/2599)) +- Hash name of local packages ([#2600](https://github.com/fishtown-analytics/dbt/pull/2600)) +- On bigquery, also persist docs for seeds ([#2598](https://github.com/fishtown-analytics/dbt/issues/2598), [#2601](https://github.com/fishtown-analytics/dbt/pull/2601)) +- Swallow all file-writing related errors on Windows, regardless of path length or exception type. ([#2603](https://github.com/fishtown-analytics/dbt/pull/2603)) + + +## dbt 0.17.1rc2 (June 25, 2020) + +### Fixes +- dbt config-version: 2 now properly defers rendering `+pre-hook` and `+post-hook` fields. ([#2583](https://github.com/fishtown-analytics/dbt/issues/2583), [#2854](https://github.com/fishtown-analytics/dbt/pull/2854)) +- dbt handles too-long paths on windows that do not report that the path is too long ([#2591](https://github.com/fishtown-analytics/dbt/pull/2591)) + + +## dbt 0.17.1rc1 (June 19, 2020) + + ### Fixes - dbt compile and ls no longer create schemas if they don't already exist ([#2525](https://github.com/fishtown-analytics/dbt/issues/2525), [#2528](https://github.com/fishtown-analytics/dbt/pull/2528)) - `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534)) - `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535)) +<<<<<<< HEAD - dbt again respects config aliases in config() calls ([#2557](https://github.com/fishtown-analytics/dbt/issues/2557), [#2559](https://github.com/fishtown-analytics/dbt/pull/2559)) +======= +- Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)]) +- `persist_docs` now takes into account descriptions for nested columns in bigquery ([#2549](https://github.com/fishtown-analytics/dbt/issues/2549), [#2550](https://github.com/fishtown-analytics/dbt/pull/2550)) +- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566)) +- dbt again respects config aliases in config() calls and dbt_project.yml ([#2557](https://github.com/fishtown-analytics/dbt/issues/2557), [#2559](https://github.com/fishtown-analytics/dbt/pull/2559), [#2575](https://github.com/fishtown-analytics/dbt/pull/2575)) +- fix unclickable nodes in the dbt Docs DAG viz ([#101](https://github.com/fishtown-analytics/dbt-docs/pull/101)) +- fix null database names for Spark projects in dbt Docs site ([#96](https://github.com/fishtown-analytics/dbt-docs/pull/96)) + +Contributors: + - [@bodschut](https://github.com/bodschut) ([#2550](https://github.com/fishtown-analytics/dbt/pull/2550)) +>>>>>>> dev/0.17.1 ## dbt 0.17.0 (June 08, 2020) diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index 07a344338b7..c398c27363b 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -8,7 +8,8 @@ from contextlib import contextmanager from itertools import chain, islice from typing import ( - List, Union, Set, Optional, Dict, Any, Iterator, Type, NoReturn, Tuple + List, Union, Set, Optional, Dict, Any, Iterator, Type, NoReturn, Tuple, + Callable ) import jinja2 @@ -28,7 +29,7 @@ from dbt.contracts.graph.parsed import ParsedSchemaTestNode from dbt.exceptions import ( InternalException, raise_compiler_error, CompilationException, - invalid_materialization_argument, MacroReturn + invalid_materialization_argument, MacroReturn, JinjaRenderingException ) from dbt import flags from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -111,6 +112,24 @@ class TextMarker(str): """ +class NativeMarker(str): + """A special native-env marker that indicates the field should be passed to + literal_eval. + """ + + +class BoolMarker(NativeMarker): + pass + + +class NumberMarker(NativeMarker): + pass + + +def _is_number(value) -> bool: + return isinstance(value, (int, float)) and not isinstance(value, bool) + + def quoted_native_concat(nodes): """This is almost native_concat from the NativeTemplate, except in the special case of a single argument that is a quoted string and returns a @@ -119,19 +138,31 @@ def quoted_native_concat(nodes): head = list(islice(nodes, 2)) if not head: - return None + return '' if len(head) == 1: raw = head[0] if isinstance(raw, TextMarker): return str(raw) + elif not isinstance(raw, NativeMarker): + # return non-strings as-is + return raw else: - raw = "".join([str(v) for v in chain(head, nodes)]) + # multiple nodes become a string. + return "".join([str(v) for v in chain(head, nodes)]) try: result = literal_eval(raw) except (ValueError, SyntaxError, MemoryError): - return raw + result = raw + if isinstance(raw, BoolMarker) and not isinstance(result, bool): + raise JinjaRenderingException( + f"Could not convert value '{raw!s}' into type 'bool'" + ) + if isinstance(raw, NumberMarker) and not _is_number(result): + raise JinjaRenderingException( + f"Could not convert value '{raw!s}' into type 'number'" + ) return result @@ -413,6 +444,22 @@ def __reduce__(self): return Undefined +NATIVE_FILTERS: Dict[str, Callable[[Any], Any]] = { + 'as_text': TextMarker, + 'as_bool': BoolMarker, + 'as_native': NativeMarker, + 'as_number': NumberMarker, +} + + +TEXT_FILTERS: Dict[str, Callable[[Any], Any]] = { + 'as_text': lambda x: x, + 'as_bool': lambda x: x, + 'as_native': lambda x: x, + 'as_number': lambda x: x, +} + + def get_environment( node=None, capture_macros: bool = False, @@ -432,13 +479,13 @@ def get_environment( text_filter: Type if native: env_cls = NativeSandboxEnvironment - text_filter = TextMarker + filters = NATIVE_FILTERS else: env_cls = MacroFuzzEnvironment - text_filter = str + filters = TEXT_FILTERS env = env_cls(**args) - env.filters['as_text'] = text_filter + env.filters.update(filters) return env diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index b167d192e4e..0b09edaa1b7 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -19,6 +19,12 @@ from dbt.logger import GLOBAL_LOGGER as logger +if sys.platform == 'win32': + from ctypes import WinDLL, c_bool +else: + WinDLL = None + c_bool = None + def find_matching( root_path: str, @@ -66,6 +72,7 @@ def find_matching( def load_file_contents(path: str, strip: bool = True) -> str: + path = convert_path(path) with open(path, 'rb') as handle: to_return = handle.read().decode('utf-8') @@ -81,6 +88,7 @@ def make_directory(path: str) -> None: exist. This function handles the case where two threads try to create a directory at once. """ + path = convert_path(path) if not os.path.exists(path): # concurrent writes that try to create the same dir can fail try: @@ -99,6 +107,7 @@ def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool: exists. The file is saved with contents `contents` """ if overwrite or not os.path.exists(path): + path = convert_path(path) with open(path, 'w') as fh: fh.write(contents) return True @@ -121,10 +130,35 @@ def supports_symlinks() -> bool: def write_file(path: str, contents: str = '') -> bool: - make_directory(os.path.dirname(path)) - with open(path, 'w', encoding='utf-8') as f: - f.write(str(contents)) - + path = convert_path(path) + try: + make_directory(os.path.dirname(path)) + with open(path, 'w', encoding='utf-8') as f: + f.write(str(contents)) + except Exception as exc: + # note that you can't just catch FileNotFound, because sometimes + # windows apparently raises something else. + # It's also not sufficient to look at the path length, because + # sometimes windows fails to write paths that are less than the length + # limit. So on windows, suppress all errors that happen from writing + # to disk. + if os.name == 'nt': + # sometimes we get a winerror of 3 which means the path was + # definitely too long, but other times we don't and it means the + # path was just probably too long. This is probably based on the + # windows/python version. + if getattr(exc, 'winerror', 0) == 3: + reason = 'Path was too long' + else: + reason = 'Path was possibly too long' + # all our hard work and the path was still too long. Log and + # continue. + logger.debug( + f'Could not write to path {path}({len(path)} characters): ' + f'{reason}\nexception: {exc}' + ) + else: + raise return True @@ -163,7 +197,7 @@ def rmdir(path: str) -> None: different permissions on Windows. Otherwise, removing directories (eg. cloned via git) can cause rmtree to throw a PermissionError exception """ - logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform)) + path = convert_path(path) if sys.platform == 'win32': onerror = _windows_rmdir_readonly else: @@ -172,15 +206,90 @@ def rmdir(path: str) -> None: shutil.rmtree(path, onerror=onerror) +def _win_prepare_path(path: str) -> str: + """Given a windows path, prepare it for use by making sure it is absolute + and normalized. + """ + path = os.path.normpath(path) + + # if a path starts with '\', splitdrive() on it will return '' for the + # drive, but the prefix requires a drive letter. So let's add the drive + # letter back in. + # Unless it starts with '\\'. In that case, the path is a UNC mount point + # and splitdrive will be fine. + if not path.startswith('\\\\') and path.startswith('\\'): + curdrive = os.path.splitdrive(os.getcwd())[0] + path = curdrive + path + + # now our path is either an absolute UNC path or relative to the current + # directory. If it's relative, we need to make it absolute or the prefix + # won't work. `ntpath.abspath` allegedly doesn't always play nice with long + # paths, so do this instead. + if not os.path.splitdrive(path)[0]: + path = os.path.join(os.getcwd(), path) + + return path + + +def _supports_long_paths() -> bool: + if sys.platform != 'win32': + return True + # Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because + # of pointer caching in a comment here: + # https://stackoverflow.com/a/35097999/11262881 + # I don't know exaclty what he means, but I am inclined to believe him as + # he's pretty active on Python windows bugs! + try: + dll = WinDLL('ntdll') + except OSError: # I don't think this happens? you need ntdll to run python + return False + # not all windows versions have it at all + if not hasattr(dll, 'RtlAreLongPathsEnabled'): + return False + # tell windows we want to get back a single unsigned byte (a bool). + dll.RtlAreLongPathsEnabled.restype = c_bool + return dll.RtlAreLongPathsEnabled() + + +def convert_path(path: str) -> str: + """Convert a path that dbt has, which might be >260 characters long, to one + that will be writable/readable on Windows. + + On other platforms, this is a no-op. + """ + # some parts of python seem to append '\*.*' to strings, better safe than + # sorry. + if len(path) < 250: + return path + if _supports_long_paths(): + return path + + prefix = '\\\\?\\' + # Nothing to do + if path.startswith(prefix): + return path + + path = _win_prepare_path(path) + + # add the prefix. The check is just in case os.getcwd() does something + # unexpected - I believe this if-state should always be True though! + if not path.startswith(prefix): + path = prefix + path + return path + + def remove_file(path: str) -> None: + path = convert_path(path) os.remove(path) def path_exists(path: str) -> bool: + path = convert_path(path) return os.path.lexists(path) def path_is_symlink(path: str) -> bool: + path = convert_path(path) return os.path.islink(path) @@ -326,6 +435,7 @@ def run_cmd( def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None: + path = convert_path(path) connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10)) response = requests.get(url, timeout=connection_timeout) with open(path, 'wb') as handle: @@ -334,6 +444,8 @@ def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None: def rename(from_path: str, to_path: str, force: bool = False) -> None: + from_path = convert_path(from_path) + to_path = convert_path(to_path) is_symlink = path_is_symlink(to_path) if os.path.exists(to_path) and force: @@ -348,6 +460,7 @@ def rename(from_path: str, to_path: str, force: bool = False) -> None: def untar_package( tar_path: str, dest_dir: str, rename_to: Optional[str] = None ) -> None: + tar_path = convert_path(tar_path) tar_dir_name = None with tarfile.open(tar_path, 'r') as tarball: tarball.extractall(dest_dir) @@ -384,6 +497,8 @@ def move(src, dst): This is almost identical to the real shutil.move, except it uses our rmtree and skips handling non-windows OSes since the existing one works ok there. """ + src = convert_path(src) + dst = convert_path(dst) if os.name != 'nt': return shutil.move(src, dst) @@ -418,4 +533,5 @@ def rmtree(path): """Recursively remove path. On permissions errors on windows, try to remove the read-only flag and try again. """ + path = convert_path(path) return shutil.rmtree(path, onerror=chmod_and_retry) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index b3c145348ca..f5de7a0d323 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -8,6 +8,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.contracts.connection import Credentials, HasCredentials from dbt.contracts.project import ProfileConfig, UserConfig +from dbt.exceptions import CompilationException from dbt.exceptions import DbtProfileError from dbt.exceptions import DbtProjectError from dbt.exceptions import ValidationException @@ -268,7 +269,10 @@ def render_profile( raw_profile, profile_name, target_name ) - profile_data = renderer.render_data(raw_profile_data) + try: + profile_data = renderer.render_data(raw_profile_data) + except CompilationException as exc: + raise DbtProfileError(str(exc)) from exc return target_name, profile_data @classmethod diff --git a/core/dbt/config/renderer.py b/core/dbt/config/renderer.py index f65625c9e3b..9e37b0e70a9 100644 --- a/core/dbt/config/renderer.py +++ b/core/dbt/config/renderer.py @@ -78,7 +78,7 @@ def should_render_keypath_v1(self, keypath: Keypath) -> bool: if first in {'on-run-start', 'on-run-end', 'query-comment'}: return False # models have two things to avoid - if first in {'seeds', 'models', 'snapshots', 'seeds'}: + if first in {'seeds', 'models', 'snapshots'}: # model-level hooks if 'pre-hook' in keypath or 'post-hook' in keypath: return False @@ -102,11 +102,12 @@ def should_render_keypath_v2(self, keypath: Keypath) -> bool: return False if first in {'seeds', 'models', 'snapshots', 'seeds'}: + keypath_parts = { + (k.lstrip('+') if isinstance(k, str) else k) + for k in keypath + } # model-level hooks - if 'pre-hook' in keypath or 'post-hook' in keypath: - return False - # model-level 'vars' declarations - if 'vars' in keypath: + if 'pre-hook' in keypath_parts or 'post-hook' in keypath_parts: return False return True diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 729c29e0c28..06366d691c5 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -257,6 +257,10 @@ def __reduce__(self): return (JSONValidationException, (self.typename, self.errors)) +class JinjaRenderingException(CompilationException): + pass + + class UnknownAsyncIDException(Exception): CODE = 10012 MESSAGE = 'RPC server got an unknown async ID' diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 7f3e4cb864c..ffcb3958081 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -28,13 +28,8 @@ def env_set_truthy(key: str) -> Optional[str]: def _get_context(): - if os.name == 'posix' and os.uname().sysname.lower() != 'darwin': - # on linux fork is available and it's fast - return multiprocessing.get_context('fork') - else: - # on windows, spawn is the only choice. - # On osx, fork is buggy: https://bugs.python.org/issue33725 - return multiprocessing.get_context('spawn') + # TODO: change this back to use fork() on linux when we have made that safe + return multiprocessing.get_context('spawn') MP_CONTEXT = _get_context() diff --git a/core/dbt/include/index.html b/core/dbt/include/index.html index 0695b123744..9a20c3cc558 100644 --- a/core/dbt/include/index.html +++ b/core/dbt/include/index.html @@ -8,7 +8,7 @@ - + @@ -24,7 +24,7 @@
icons
- diff --git a/core/dbt/rpc/method.py b/core/dbt/rpc/method.py index b2bd1b36456..74563305107 100644 --- a/core/dbt/rpc/method.py +++ b/core/dbt/rpc/method.py @@ -1,5 +1,6 @@ import inspect from abc import abstractmethod +from copy import deepcopy from typing import List, Optional, Type, TypeVar, Generic, Dict, Any from hologram import JsonSchemaMixin, ValidationError @@ -20,7 +21,7 @@ class RemoteMethod(Generic[Parameters, Result]): METHOD_NAME: Optional[str] = None def __init__(self, args, config): - self.args = args + self.args = deepcopy(args) self.config = config @classmethod diff --git a/core/dbt/task/deps.py b/core/dbt/task/deps.py index 14320fbf2f4..f6979295cb9 100644 --- a/core/dbt/task/deps.py +++ b/core/dbt/task/deps.py @@ -26,6 +26,7 @@ def track_package_install( # Hub packages do not need to be hashed, as they are public # Use the string 'local' for local package versions if source_type == 'local': + package_name = dbt.utils.md5(package_name) version = 'local' elif source_type != 'hub': package_name = dbt.utils.md5(package_name) diff --git a/core/setup.py b/core/setup.py index 8a89c916959..7bf047a4cbb 100644 --- a/core/setup.py +++ b/core/setup.py @@ -66,7 +66,6 @@ def read(fname): 'dataclasses==0.6;python_version<"3.7"', 'hologram==0.0.7', 'logbook>=1.5,<1.6', - 'pytest-logbook>=1.2.0,<1.3', 'typing-extensions>=3.7.4,<3.8', # the following are all to match snowflake-connector-python 'requests>=2.18.0,<2.23.0', diff --git a/docker/requirements/requirements.0.17.1rc1.txt b/docker/requirements/requirements.0.17.1rc1.txt new file mode 100644 index 00000000000..4d2113811de --- /dev/null +++ b/docker/requirements/requirements.0.17.1rc1.txt @@ -0,0 +1,64 @@ +agate==1.6.1 +asn1crypto==1.3.0 +attrs==19.3.0 +azure-common==1.1.25 +azure-storage-blob==2.1.0 +azure-storage-common==2.1.0 +Babel==2.8.0 +boto3==1.11.17 +botocore==1.14.17 +cachetools==4.1.0 +certifi==2020.4.5.2 +cffi==1.13.2 +chardet==3.0.4 +colorama==0.4.3 +cryptography==2.9.2 +decorator==4.4.2 +docutils==0.15.2 +google-api-core==1.16.0 +google-auth==1.18.0 +google-cloud-bigquery==1.24.0 +google-cloud-core==1.3.0 +google-resumable-media==0.5.1 +googleapis-common-protos==1.6.0 +hologram==0.0.7 +idna==2.8 +ijson==2.6.1 +importlib-metadata==1.6.1 +isodate==0.6.0 +Jinja2==2.11.2 +jmespath==0.10.0 +json-rpc==1.13.0 +jsonschema==3.1.1 +leather==0.3.3 +Logbook==1.5.3 +MarkupSafe==1.1.1 +minimal-snowplow-tracker==0.0.2 +networkx==2.4 +oscrypto==1.2.0 +parsedatetime==2.6 +protobuf==3.11.3 +psycopg2-binary==2.8.5 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pycryptodomex==3.9.7 +PyJWT==1.7.1 +pyOpenSSL==19.1.0 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +python-slugify==4.0.0 +pytimeparse==1.1.8 +pytz==2020.1 +PyYAML==5.3.1 +requests==2.22.0 +rsa==4.6 +s3transfer==0.3.3 +six==1.15.0 +snowflake-connector-python==2.2.1 +sqlparse==0.3.1 +text-unidecode==1.3 +typing-extensions==3.7.4.2 +urllib3==1.25.9 +Werkzeug==0.16.1 +zipp==3.1.0 diff --git a/docker/requirements/requirements.0.17.1rc2.txt b/docker/requirements/requirements.0.17.1rc2.txt new file mode 100644 index 00000000000..a1b927dcfa6 --- /dev/null +++ b/docker/requirements/requirements.0.17.1rc2.txt @@ -0,0 +1,64 @@ +agate==1.6.1 +asn1crypto==1.3.0 +attrs==19.3.0 +azure-common==1.1.25 +azure-storage-blob==2.1.0 +azure-storage-common==2.1.0 +Babel==2.8.0 +boto3==1.11.17 +botocore==1.14.17 +cachetools==4.1.0 +certifi==2020.6.20 +cffi==1.13.2 +chardet==3.0.4 +colorama==0.4.3 +cryptography==2.9.2 +decorator==4.4.2 +docutils==0.15.2 +google-api-core==1.16.0 +google-auth==1.18.0 +google-cloud-bigquery==1.24.0 +google-cloud-core==1.3.0 +google-resumable-media==0.5.1 +googleapis-common-protos==1.6.0 +hologram==0.0.7 +idna==2.8 +ijson==2.6.1 +importlib-metadata==1.6.1 +isodate==0.6.0 +Jinja2==2.11.2 +jmespath==0.10.0 +json-rpc==1.13.0 +jsonschema==3.1.1 +leather==0.3.3 +Logbook==1.5.3 +MarkupSafe==1.1.1 +minimal-snowplow-tracker==0.0.2 +networkx==2.4 +oscrypto==1.2.0 +parsedatetime==2.6 +protobuf==3.11.3 +psycopg2-binary==2.8.5 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pycryptodomex==3.9.8 +PyJWT==1.7.1 +pyOpenSSL==19.1.0 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +python-slugify==4.0.0 +pytimeparse==1.1.8 +pytz==2020.1 +PyYAML==5.3.1 +requests==2.22.0 +rsa==4.6 +s3transfer==0.3.3 +six==1.15.0 +snowflake-connector-python==2.2.1 +sqlparse==0.3.1 +text-unidecode==1.3 +typing-extensions==3.7.4.2 +urllib3==1.25.9 +Werkzeug==0.16.1 +zipp==3.1.0 diff --git a/docker/requirements/requirements.0.17.1rc3.txt b/docker/requirements/requirements.0.17.1rc3.txt new file mode 100644 index 00000000000..58c730e2d93 --- /dev/null +++ b/docker/requirements/requirements.0.17.1rc3.txt @@ -0,0 +1,64 @@ +agate==1.6.1 +asn1crypto==1.3.0 +attrs==19.3.0 +azure-common==1.1.25 +azure-storage-blob==2.1.0 +azure-storage-common==2.1.0 +Babel==2.8.0 +boto3==1.11.17 +botocore==1.14.17 +cachetools==4.1.1 +certifi==2020.6.20 +cffi==1.13.2 +chardet==3.0.4 +colorama==0.4.3 +cryptography==2.9.2 +decorator==4.4.2 +docutils==0.15.2 +google-api-core==1.16.0 +google-auth==1.18.0 +google-cloud-bigquery==1.24.0 +google-cloud-core==1.3.0 +google-resumable-media==0.5.1 +googleapis-common-protos==1.6.0 +hologram==0.0.7 +idna==2.8 +ijson==2.6.1 +importlib-metadata==1.7.0 +isodate==0.6.0 +Jinja2==2.11.2 +jmespath==0.10.0 +json-rpc==1.13.0 +jsonschema==3.1.1 +leather==0.3.3 +Logbook==1.5.3 +MarkupSafe==1.1.1 +minimal-snowplow-tracker==0.0.2 +networkx==2.4 +oscrypto==1.2.0 +parsedatetime==2.6 +protobuf==3.11.3 +psycopg2-binary==2.8.5 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pycryptodomex==3.9.8 +PyJWT==1.7.1 +pyOpenSSL==19.1.0 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +python-slugify==4.0.1 +pytimeparse==1.1.8 +pytz==2020.1 +PyYAML==5.3.1 +requests==2.22.0 +rsa==4.6 +s3transfer==0.3.3 +six==1.15.0 +snowflake-connector-python==2.2.1 +sqlparse==0.3.1 +text-unidecode==1.3 +typing-extensions==3.7.4.2 +urllib3==1.25.9 +Werkzeug==0.16.1 +zipp==3.1.0 diff --git a/docker/requirements/requirements.0.17.1rc4.txt b/docker/requirements/requirements.0.17.1rc4.txt new file mode 100644 index 00000000000..58c730e2d93 --- /dev/null +++ b/docker/requirements/requirements.0.17.1rc4.txt @@ -0,0 +1,64 @@ +agate==1.6.1 +asn1crypto==1.3.0 +attrs==19.3.0 +azure-common==1.1.25 +azure-storage-blob==2.1.0 +azure-storage-common==2.1.0 +Babel==2.8.0 +boto3==1.11.17 +botocore==1.14.17 +cachetools==4.1.1 +certifi==2020.6.20 +cffi==1.13.2 +chardet==3.0.4 +colorama==0.4.3 +cryptography==2.9.2 +decorator==4.4.2 +docutils==0.15.2 +google-api-core==1.16.0 +google-auth==1.18.0 +google-cloud-bigquery==1.24.0 +google-cloud-core==1.3.0 +google-resumable-media==0.5.1 +googleapis-common-protos==1.6.0 +hologram==0.0.7 +idna==2.8 +ijson==2.6.1 +importlib-metadata==1.7.0 +isodate==0.6.0 +Jinja2==2.11.2 +jmespath==0.10.0 +json-rpc==1.13.0 +jsonschema==3.1.1 +leather==0.3.3 +Logbook==1.5.3 +MarkupSafe==1.1.1 +minimal-snowplow-tracker==0.0.2 +networkx==2.4 +oscrypto==1.2.0 +parsedatetime==2.6 +protobuf==3.11.3 +psycopg2-binary==2.8.5 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pycryptodomex==3.9.8 +PyJWT==1.7.1 +pyOpenSSL==19.1.0 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +python-slugify==4.0.1 +pytimeparse==1.1.8 +pytz==2020.1 +PyYAML==5.3.1 +requests==2.22.0 +rsa==4.6 +s3transfer==0.3.3 +six==1.15.0 +snowflake-connector-python==2.2.1 +sqlparse==0.3.1 +text-unidecode==1.3 +typing-extensions==3.7.4.2 +urllib3==1.25.9 +Werkzeug==0.16.1 +zipp==3.1.0 diff --git a/plugins/bigquery/dbt/adapters/bigquery/impl.py b/plugins/bigquery/dbt/adapters/bigquery/impl.py index 7b80f096f14..250407e7ccf 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/impl.py +++ b/plugins/bigquery/dbt/adapters/bigquery/impl.py @@ -596,6 +596,36 @@ def get_table_ref_from_relation(self, conn, relation): relation.identifier, conn) + def _update_column_dict(self, bq_column_dict, dbt_columns, parent=''): + """ + Helper function to recursively traverse the schema of a table in the + update_column_descriptions function below. + + bq_column_dict should be a dict as obtained by the to_api_repr() + function of a SchemaField object. + """ + if parent: + dotted_column_name = '{}.{}'.format(parent, bq_column_dict['name']) + else: + dotted_column_name = bq_column_dict['name'] + + if dotted_column_name in dbt_columns: + column_config = dbt_columns[dotted_column_name] + bq_column_dict['description'] = column_config.get('description') + + new_fields = [] + for child_col_dict in bq_column_dict.get('fields', list()): + new_child_column_dict = self._update_column_dict( + child_col_dict, + dbt_columns, + parent=dotted_column_name + ) + new_fields.append(new_child_column_dict) + + bq_column_dict['fields'] = new_fields + + return bq_column_dict + @available.parse_none def update_column_descriptions(self, relation, columns): if len(columns) == 0: @@ -606,17 +636,34 @@ def update_column_descriptions(self, relation, columns): table = conn.handle.get_table(table_ref) new_schema = [] - for column in table.schema: - if column.name in columns: - column_config = columns[column.name] - column_dict = column.to_api_repr() - column_dict['description'] = column_config.get('description') - column = SchemaField.from_api_repr(column_dict) - new_schema.append(column) + for bq_column in table.schema: + bq_column_dict = bq_column.to_api_repr() + new_bq_column_dict = self._update_column_dict( + bq_column_dict, + columns + ) + new_schema.append(SchemaField.from_api_repr(new_bq_column_dict)) new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema) conn.handle.update_table(new_table, ['schema']) + @available.parse_none + def update_table_description( + self, database: str, schema: str, identifier: str, description: str + ): + conn = self.connections.get_thread_connection() + client = conn.handle + + table_ref = self.connections.table_ref( + database, + schema, + identifier, + conn + ) + table = client.get_table(table_ref) + table.description = description + client.update_table(table, ['description']) + @available.parse_none def alter_table_add_columns(self, relation, columns): diff --git a/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql b/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql index ed64af88173..6822d88a6a8 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql @@ -83,7 +83,6 @@ -- use the "real" column name from the paths query below column_name as base_column_name, ordinal_position as column_index, - cast(null as string) as column_comment, is_partitioning_column, clustering_ordinal_position @@ -99,10 +98,10 @@ concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, field_path as column_name, data_type as column_type, - column_name as base_column_name + column_name as base_column_name, + description as column_comment from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }} - where data_type not like 'STRUCT%' ), diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/seed.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/seed.sql index d628673747b..d95cc4e1b10 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/seed.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/seed.sql @@ -12,5 +12,8 @@ {%- set column_override = model['config'].get('column_types', {}) -%} {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], agate_table, column_override) }} + {% if config.persist_relation_docs() and 'description' in model %} + {{ adapter.update_table_description(model['database'], model['schema'], model['alias'], model['description']) }} + {% endif %} {% endmacro %} diff --git a/plugins/bigquery/setup.py b/plugins/bigquery/setup.py index b88d6cf2523..7b827466523 100644 --- a/plugins/bigquery/setup.py +++ b/plugins/bigquery/setup.py @@ -40,6 +40,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), + 'protobuf>=3.6.0,<3.12', 'google-cloud-core>=1.3.0,<1.4', 'google-cloud-bigquery>=1.24.0,<1.25.0', 'google-api-core>=1.16.0,<1.17.0', diff --git a/test/integration/014_hook_tests/test_model_hooks.py b/test/integration/014_hook_tests/test_model_hooks.py index 2efce14aca7..ad4c8ce3ef0 100644 --- a/test/integration/014_hook_tests/test_model_hooks.py +++ b/test/integration/014_hook_tests/test_model_hooks.py @@ -226,6 +226,23 @@ def test_postgres_hooks_on_seeds(self): self.assertEqual(len(res), 1, 'Expected exactly one item') +class TestPrePostModelHooksOnSeedsPlusPrefixed(TestPrePostModelHooksOnSeeds): + @property + def project_config(self): + return { + 'config-version': 2, + 'data-paths': ['data'], + 'models': {}, + 'seeds': { + '+post-hook': [ + 'alter table {{ this }} add column new_col int', + 'update {{ this }} set new_col = 1' + ], + 'quote_columns': False, + }, + } + + class TestPrePostModelHooksOnSnapshots(DBTIntegrationTest): @property def schema(self): diff --git a/test/integration/029_docs_generate_tests/bq_models/nested_table.sql b/test/integration/029_docs_generate_tests/bq_models/nested_table.sql index 61309296267..22f9048d629 100644 --- a/test/integration/029_docs_generate_tests/bq_models/nested_table.sql +++ b/test/integration/029_docs_generate_tests/bq_models/nested_table.sql @@ -10,6 +10,6 @@ select 3 as field_3, struct( - 4 as field_4, - 5 as field_5 + 5 as field_5, + 6 as field_6 ) as nested_field diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index b3bc96b069f..7c959eb0fb6 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -2,15 +2,19 @@ import json import os import random +import shutil +import tempfile import time from datetime import datetime from unittest.mock import ANY, patch +from pytest import mark from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \ AnyString, AnyStringWith, normalize, Normalized from dbt.exceptions import CompilationException + def _read_file(path): with open(path, 'r') as fp: return fp.read().replace('\r', '').replace('\\r', '') @@ -615,10 +619,10 @@ def expected_bigquery_complex_catalog(self): 'type': 'INT64', 'comment': None }, - 'nested_field.field_4': { - 'name': 'nested_field.field_4', + 'nested_field': { + 'name': 'nested_field', 'index': 4, - 'type': 'INT64', + 'type': 'STRUCT', 'comment': None }, 'nested_field.field_5': { @@ -626,6 +630,12 @@ def expected_bigquery_complex_catalog(self): 'index': 5, 'type': 'INT64', 'comment': None + }, + 'nested_field.field_6': { + 'name': 'nested_field.field_6', + 'index': 6, + 'type': 'INT64', + 'comment': None } } @@ -3206,3 +3216,65 @@ def test_postgres_override_used(self): self.run_dbt(['docs', 'generate']) self.assertIn('rejected: no catalogs for you', str(exc.exception)) + + +@mark.skipif(os.name != 'nt', reason='This is only relevant on windows') +class TestDocsGenerateLongWindowsPaths(DBTIntegrationTest): + def _generate_test_root_dir(self): + assert os.name == 'nt' + magic_prefix = '\\\\?\\' + + # tempfile.mkdtemp doesn't use `\\?\` by default so we have to + # get a tiny bit creative. + temp_dir = tempfile.gettempdir() + if not temp_dir.startswith(magic_prefix): + temp_dir = magic_prefix + temp_dir + outer = tempfile.mkdtemp(prefix='dbt-int-test-', dir=temp_dir) + # then inside _that_ directory make a new one that gets us to just + # barely 260 total. I picked 250 to account for the '\' and anything + # else. The key is that len(inner) + len('target\\compiled\\...') will + # be >260 chars + new_length = 250 - len(outer) + inner = os.path.join(outer, 'a'*new_length) + os.mkdir(inner) + return normalize(inner) + + def _symlink_test_folders(self): + # dbt's normal symlink behavior breaks this test, so special-case it + for entry in os.listdir(self.test_original_source_path): + src = os.path.join(self.test_original_source_path, entry) + tst = os.path.join(self.test_root_dir, entry) + if entry == 'trivial_models': + shutil.copytree(src, tst) + elif entry == 'local_dependency': + continue + elif os.path.isdir(entry) or entry.endswith('.sql'): + os.symlink(src, tst) + + @property + def schema(self): + return 'docs_generate_029' + + @staticmethod + def dir(path): + return normalize(path) + + @property + def models(self): + return self.dir("trivial_models") + + def run_and_generate(self): + self.assertEqual(len(self.run_dbt(['run'])), 1) + os.remove(normalize('target/manifest.json')) + os.remove(normalize('target/run_results.json')) + self.run_dbt(['docs', 'generate']) + + @use_profile('postgres') + def test_postgres_long_paths(self): + self.run_and_generate() + # this doesn't use abspath, so all should be well here + manifest = _read_json('./target/manifest.json') + self.assertIn('nodes', manifest) + assert os.path.exists('./target/run/test/trivial_models/model.sql') + self.run_dbt(['clean']) + assert not os.path.exists('./target/run') diff --git a/test/integration/039_config_test/test_configs.py b/test/integration/039_config_test/test_configs.py index 1246d72be71..db9823ed3bc 100644 --- a/test/integration/039_config_test/test_configs.py +++ b/test/integration/039_config_test/test_configs.py @@ -110,9 +110,9 @@ def postgres_profile(self): 'default2': { 'type': 'postgres', # make sure you can do this and get an int out - 'threads': "{{ 1 + 3 }}", + 'threads': "{{ (1 + 3) | as_number }}", 'host': self.database_host, - 'port': "{{ 5400 + 32 }}", + 'port': "{{ (5400 + 32) | as_number }}", 'user': 'root', 'pass': 'password', 'dbname': 'dbt', @@ -121,9 +121,9 @@ def postgres_profile(self): 'disabled': { 'type': 'postgres', # make sure you can do this and get an int out - 'threads': "{{ 1 + 3 }}", + 'threads': "{{ (1 + 3) | as_number }}", 'host': self.database_host, - 'port': "{{ 5400 + 32 }}", + 'port': "{{ (5400 + 32) | as_number }}", 'user': 'root', 'pass': 'password', 'dbname': 'dbt', @@ -141,7 +141,7 @@ def project_config(self): 'data-paths': ['data'], 'models': { 'test': { - 'enabled': "{{ target.name == 'default2' }}", + 'enabled': "{{ (target.name == 'default2' | as_bool) }}", }, }, # set the `var` result in schema.yml to be 'seed', so that the @@ -155,7 +155,7 @@ def project_config(self): 'quote_columns': False, 'test': { 'seed': { - 'enabled': "{{ target.name == 'default2' }}", + 'enabled': "{{ (target.name == 'default2') | as_bool }}", }, }, }, diff --git a/test/integration/040_override_database_test/test_override_database.py b/test/integration/040_override_database_test/test_override_database.py index 37ea59b8eb5..1686ca1214e 100644 --- a/test/integration/040_override_database_test/test_override_database.py +++ b/test/integration/040_override_database_test/test_override_database.py @@ -99,14 +99,45 @@ def test_snowflake_database_override(self): self.run_database_override() -class TestProjectModelOverride(BaseOverrideDatabase): +class BaseTestProjectModelOverride(BaseOverrideDatabase): + # this is janky, but I really want to access self.default_database in + # project_config + @property + def default_database(self): + target = self._profile_config['test']['target'] + profile = self._profile_config['test']['outputs'][target] + for key in ['database', 'project', 'dbname']: + if key in profile: + database = profile[key] + if self.adapter_type == 'snowflake': + return database.upper() + return database + assert False, 'No profile database found!' + def run_database_override(self): + self.run_dbt_notstrict(['seed']) + self.assertEqual(len(self.run_dbt_notstrict(['run'])), 4) + self.assertExpectedRelations() + + def assertExpectedRelations(self): if self.adapter_type == 'snowflake': func = lambda x: x.upper() else: func = lambda x: x - self.use_default_project({ + self.assertManyRelationsEqual([ + (func('seed'), self.unique_schema(), self.default_database), + (func('view_2'), self.unique_schema(), self.alternative_database), + (func('view_1'), self.unique_schema(), self.alternative_database), + (func('view_3'), self.unique_schema(), self.default_database), + (func('view_4'), self.unique_schema(), self.alternative_database), + ]) + + +class TestProjectModelOverride(BaseTestProjectModelOverride): + @property + def project_config(self): + return { 'config-version': 2, 'vars': { 'alternate_db': self.alternative_database, @@ -119,17 +150,17 @@ def run_database_override(self): } } }, - }) - self.run_dbt_notstrict(['seed']) - - self.assertEqual(len(self.run_dbt_notstrict(['run'])), 4) - self.assertManyRelationsEqual([ - (func('seed'), self.unique_schema(), self.default_database), - (func('view_2'), self.unique_schema(), self.alternative_database), - (func('view_1'), self.unique_schema(), self.alternative_database), - (func('view_3'), self.unique_schema(), self.default_database), - (func('view_4'), self.unique_schema(), self.alternative_database), - ]) + 'data-paths': ['data'], + 'vars': { + 'alternate_db': self.alternative_database, + }, + 'quoting': { + 'database': True, + }, + 'seeds': { + 'quote_columns': False, + } + } @use_profile('bigquery') def test_bigquery_database_override(self): @@ -140,6 +171,39 @@ def test_snowflake_database_override(self): self.run_database_override() +class TestProjectModelAliasOverride(BaseTestProjectModelOverride): + @property + def project_config(self): + return { + 'config-version': 2, + 'vars': { + 'alternate_db': self.alternative_database, + }, + 'models': { + 'project': self.alternative_database, + 'test': { + 'subfolder': { + 'project': self.default_database, + } + } + }, + 'data-paths': ['data'], + 'vars': { + 'alternate_db': self.alternative_database, + }, + 'quoting': { + 'database': True, + }, + 'seeds': { + 'quote_columns': False, + } + } + + @use_profile('bigquery') + def test_bigquery_project_override(self): + self.run_database_override() + + class TestProjectSeedOverride(BaseOverrideDatabase): def run_database_override(self): if self.adapter_type == 'snowflake': diff --git a/test/integration/048_rpc_test/test_rpc.py b/test/integration/048_rpc_test/test_rpc.py index 866dcb750b3..912876624df 100644 --- a/test/integration/048_rpc_test/test_rpc.py +++ b/test/integration/048_rpc_test/test_rpc.py @@ -66,7 +66,7 @@ def is_up(self): def start(self): super().start() - for _ in range(30): + for _ in range(60): if self.is_up(): break time.sleep(0.5) @@ -726,8 +726,9 @@ def test_invalid_requests_postgres(self): 'hi this is not sql', name='foo' ).json() - # neat mystery: Why is this "1" on macos and "2" on linux? - lineno = '1' if sys.platform == 'darwin' else '2' + # this is "1" if the multiprocessing context is "spawn" and "2" if + # it's fork. + lineno = '1' error_data = self.assertIsErrorWith(data, 10003, 'Database Error', { 'type': 'DatabaseException', 'message': f'Database Error in rpc foo (from remote system)\n syntax error at or near "hi"\n LINE {lineno}: hi this is not sql\n ^', @@ -767,11 +768,9 @@ def test_timeout_postgres(self): self.assertIn('message', error_data) self.assertEqual(error_data['message'], 'RPC timed out after 1.0s') self.assertIn('logs', error_data) - if sys.platform == 'darwin': - # because fork() without exec() is broken on macos, we use 'spawn' - # so on macos we don't get logs back because we didn't fork() - return - self.assertTrue(len(error_data['logs']) > 0) + # because fork() without exec() is broken, we use 'spawn' so we don't + # get logs back because we didn't fork() + return @mark.flaky(rerun_filter=addr_in_use) diff --git a/test/integration/060_persist_docs_tests/data/seed.csv b/test/integration/060_persist_docs_tests/data/seed.csv new file mode 100644 index 00000000000..1a728c8ab74 --- /dev/null +++ b/test/integration/060_persist_docs_tests/data/seed.csv @@ -0,0 +1,3 @@ +id,name +1,Alice +2,Bob diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/schema.yml b/test/integration/060_persist_docs_tests/models-bigquery-nested/schema.yml new file mode 100644 index 00000000000..0311dcb1449 --- /dev/null +++ b/test/integration/060_persist_docs_tests/models-bigquery-nested/schema.yml @@ -0,0 +1,19 @@ +version: 2 + +models: + - name: table_model_nested + columns: + - name: level_1 + description: level_1 column description + - name: level_1.level_2 + description: level_2 column description + - name: level_1.level_2.level_3_a + description: level_3 column description + - name: view_model_nested + columns: + - name: level_1 + description: level_1 column description + - name: level_1.level_2 + description: level_2 column description + - name: level_1.level_2.level_3_a + description: level_3 column description \ No newline at end of file diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/table_model_nested.sql b/test/integration/060_persist_docs_tests/models-bigquery-nested/table_model_nested.sql new file mode 100644 index 00000000000..c2936d4f186 --- /dev/null +++ b/test/integration/060_persist_docs_tests/models-bigquery-nested/table_model_nested.sql @@ -0,0 +1,8 @@ +{{ config(materialized='table') }} +SELECT + STRUCT( + STRUCT( + 1 AS level_3_a, + 2 AS level_3_b + ) AS level_2 + ) AS level_1 \ No newline at end of file diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/view_model_nested.sql b/test/integration/060_persist_docs_tests/models-bigquery-nested/view_model_nested.sql new file mode 100644 index 00000000000..e3323ddf4e6 --- /dev/null +++ b/test/integration/060_persist_docs_tests/models-bigquery-nested/view_model_nested.sql @@ -0,0 +1,8 @@ +{{ config(materialized='view') }} +SELECT + STRUCT( + STRUCT( + 1 AS level_3_a, + 2 AS level_3_b + ) AS level_2 + ) AS level_1 \ No newline at end of file diff --git a/test/integration/060_persist_docs_tests/models/schema.yml b/test/integration/060_persist_docs_tests/models/schema.yml index 74dfa0a3378..5a909162456 100644 --- a/test/integration/060_persist_docs_tests/models/schema.yml +++ b/test/integration/060_persist_docs_tests/models/schema.yml @@ -43,3 +43,28 @@ models: -- /* comment */ Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + +seeds: + - name: seed + description: | + Seed model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} diff --git a/test/integration/060_persist_docs_tests/test_persist_docs.py b/test/integration/060_persist_docs_tests/test_persist_docs.py index c5a6cc1689e..deea4b2323e 100644 --- a/test/integration/060_persist_docs_tests/test_persist_docs.py +++ b/test/integration/060_persist_docs_tests/test_persist_docs.py @@ -33,13 +33,15 @@ def _assert_has_table_comments(self, table_node): assert table_id_comment.startswith('id Column description') table_name_comment = table_node['columns']['name']['comment'] - assert table_name_comment.startswith('Some stuff here and then a call to') + assert table_name_comment.startswith( + 'Some stuff here and then a call to') self._assert_common_comments( table_comment, table_id_comment, table_name_comment ) - def _assert_has_view_comments(self, view_node, has_node_comments=True, has_column_comments=True): + def _assert_has_view_comments(self, view_node, has_node_comments=True, + has_column_comments=True): view_comment = view_node['metadata']['comment'] if has_node_comments: assert view_comment.startswith('View model description') @@ -146,7 +148,15 @@ def project_config(self): "columns": True, }, } - } + }, + 'seeds': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + } + }, } @use_profile('snowflake') @@ -155,4 +165,102 @@ def test_snowflake_persist_docs(self): @use_profile('bigquery') def test_bigquery_persist_docs(self): + self.run_dbt(['seed']) + self.run_dbt() + desc_map = { + 'seed': 'Seed model description', + 'table_model': 'Table model description', + 'view_model': 'View model description', + } + for node_id in ['seed', 'table_model', 'view_model']: + with self.adapter.connection_named('_test'): + client = self.adapter.connections \ + .get_thread_connection().handle + + table_id = "{}.{}.{}".format( + self.default_database, + self.unique_schema(), + node_id + ) + bq_table = client.get_table(table_id) + + bq_schema = bq_table.schema + + assert bq_table.description.startswith(desc_map[node_id]) + assert bq_schema[0].description.startswith('id Column description ') + if not node_id.startswith('view'): + assert bq_schema[1].description.startswith('Some stuff here and then a call to') + + +class TestPersistDocsNested(BasePersistDocsTest): + @property + def project_config(self): + return { + 'config-version': 2, + 'models': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + } + } + } + + @property + def models(self): + return 'models-bigquery-nested' + + @use_profile('bigquery') + def test_bigquery_persist_docs(self): + """ + run dbt and use the bigquery client from the adapter to check if the + colunmn descriptions are persisted on the test model table and view. + + Next, generate the catalog and check if the comments are also included. + """ + self.run_dbt(['seed']) self.run_dbt() + + self.run_dbt(['docs', 'generate']) + with open('target/catalog.json') as fp: + catalog_data = json.load(fp) + assert 'nodes' in catalog_data + assert len(catalog_data['nodes']) == 3 # seed, table, and view model + + for node_id in ['table_model_nested', 'view_model_nested']: + # check the descriptions using the api + with self.adapter.connection_named('_test'): + client = self.adapter.connections \ + .get_thread_connection().handle + + table_id = "{}.{}.{}".format( + self.default_database, + self.unique_schema(), + node_id + ) + bq_schema = client.get_table(table_id).schema + + level_1_field = bq_schema[0] + assert level_1_field.description == \ + "level_1 column description" + + level_2_field = level_1_field.fields[0] + assert level_2_field.description == \ + "level_2 column description" + + level_3_field = level_2_field.fields[0] + assert level_3_field.description == \ + "level_3 column description" + + # check the descriptions in the catalog + node = catalog_data['nodes']['model.test.{}'.format(node_id)] + + level_1_column = node['columns']['level_1'] + assert level_1_column['comment'] == "level_1 column description" + + level_2_column = node['columns']['level_1.level_2'] + assert level_2_column['comment'] == "level_2 column description" + + level_3_column = node['columns']['level_1.level_2.level_3_a'] + assert level_3_column['comment'] == "level_3 column description" diff --git a/test/integration/base.py b/test/integration/base.py index 00e21c54207..9a2d4edd1c2 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -184,7 +184,7 @@ def redshift_profile(self): 'type': 'redshift', 'threads': 1, 'host': os.getenv('REDSHIFT_TEST_HOST'), - 'port': os.getenv('REDSHIFT_TEST_PORT'), + 'port': int(os.getenv('REDSHIFT_TEST_PORT')), 'user': os.getenv('REDSHIFT_TEST_USER'), 'pass': os.getenv('REDSHIFT_TEST_PASS'), 'dbname': os.getenv('REDSHIFT_TEST_DBNAME'), @@ -348,6 +348,9 @@ def test_root_realpath(self): else: return self.test_root_dir + def _generate_test_root_dir(self): + return normalize(tempfile.mkdtemp(prefix='dbt-int-test-')) + def setUp(self): self.dbt_core_install_root = os.path.dirname(dbt.__file__) log_manager.reset_handlers() @@ -357,7 +360,7 @@ def setUp(self): self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix) _really_makedirs(self._logs_dir) self.test_original_source_path = _pytest_get_test_root() - self.test_root_dir = normalize(tempfile.mkdtemp(prefix='dbt-int-test-')) + self.test_root_dir = self._generate_test_root_dir() os.chdir(self.test_root_dir) try: diff --git a/test/rpc/test_concurrency.py b/test/rpc/test_concurrency.py new file mode 100644 index 00000000000..31be49540ef --- /dev/null +++ b/test/rpc/test_concurrency.py @@ -0,0 +1,39 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed + +from .util import ( + get_querier, + ProjectDefinition, +) + + +def _compile_poll_for_result(querier, id: int): + sql = f'select {id} as id' + resp = querier.compile_sql( + request_id=id, sql=sql, name=f'query_{id}' + ) + compile_sql_result = querier.async_wait_for_result(resp) + assert compile_sql_result['results'][0]['compiled_sql'] == sql + + +def test_rpc_compile_sql_concurrency( + project_root, profiles_root, postgres_profile, unique_schema +): + project = ProjectDefinition( + models={'my_model.sql': 'select 1 as id'} + ) + querier_ctx = get_querier( + project_def=project, + project_dir=project_root, + profiles_dir=profiles_root, + schema=unique_schema, + test_kwargs={}, + ) + + with querier_ctx as querier: + values = {} + with ThreadPoolExecutor(max_workers=10) as tpe: + for id in range(20): + fut = tpe.submit(_compile_poll_for_result, querier, id) + values[fut] = id + for fut in as_completed(values): + fut.result() diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 11f8889e133..058f2cf233d 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -131,7 +131,7 @@ def setUp(self): 'with-vars': { 'type': "{{ env_var('env_value_type') }}", 'host': "{{ env_var('env_value_host') }}", - 'port': "{{ env_var('env_value_port') }}", + 'port': "{{ env_var('env_value_port') | as_number }}", 'user': "{{ env_var('env_value_user') }}", 'pass': "{{ env_var('env_value_pass') }}", 'dbname': "{{ env_var('env_value_dbname') }}", @@ -140,7 +140,7 @@ def setUp(self): 'cli-and-env-vars': { 'type': "{{ env_var('env_value_type') }}", 'host': "{{ var('cli_value_host') }}", - 'port': "{{ env_var('env_value_port') }}", + 'port': "{{ env_var('env_value_port') | as_number }}", 'user': "{{ env_var('env_value_user') }}", 'pass': "{{ env_var('env_value_pass') }}", 'dbname': "{{ env_var('env_value_dbname') }}", @@ -367,7 +367,7 @@ def test_invalid_env_vars(self): renderer, target_override='with-vars' ) - self.assertIn("not of type 'integer'", str(exc.exception)) + self.assertIn("Could not convert value 'hello' into type 'number'", str(exc.exception)) class TestProfileFile(BaseFileTest): @@ -511,7 +511,7 @@ def test_invalid_env_vars(self): with self.assertRaises(dbt.exceptions.DbtProfileError) as exc: self.from_args() - self.assertIn("not of type 'integer'", str(exc.exception)) + self.assertIn("Could not convert value 'hello' into type 'number'", str(exc.exception)) def test_cli_and_env_vars(self): self.args.target = 'cli-and-env-vars' diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 491348de914..d0d780dcb52 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -1,9 +1,378 @@ +from contextlib import contextmanager +import pytest import unittest +import yaml from dbt.clients.jinja import get_rendered from dbt.clients.jinja import get_template from dbt.clients.jinja import extract_toplevel_blocks -from dbt.exceptions import CompilationException +from dbt.exceptions import CompilationException, JinjaRenderingException + + +@contextmanager +def returns(value): + yield value + + +@contextmanager +def raises(value): + with pytest.raises(value) as exc: + yield exc + + +def expected_id(arg): + if isinstance(arg, list): + return '_'.join(arg) + + +jinja_tests = [ + # strings + ( + '''foo: bar''', + returns('bar'), + returns('bar'), + ), + ( + '''foo: "bar"''', + returns('bar'), + returns('bar'), + ), + ( + '''foo: "'bar'"''', + returns("'bar'"), + returns("'bar'"), + ), + ( + """foo: '"bar"'""", + returns('"bar"'), + returns('"bar"'), + ), + ( + '''foo: "{{ 'bar' | as_text }}"''', + returns('bar'), + returns('bar'), + ), + ( + '''foo: "{{ 'bar' | as_bool }}"''', + returns('bar'), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ 'bar' | as_number }}"''', + returns('bar'), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ 'bar' | as_native }}"''', + returns('bar'), + returns('bar'), + ), + # ints + ( + '''foo: 1''', + returns('1'), + returns('1'), + ), + ( + '''foo: "1"''', + returns('1'), + returns('1'), + ), + ( + '''foo: "'1'"''', + returns("'1'"), + returns("'1'"), + ), + ( + """foo: '"1"'""", + returns('"1"'), + returns('"1"'), + ), + ( + '''foo: "{{ 1 }}"''', + returns('1'), + returns('1'), + ), + ( + '''foo: "{{ '1' }}"''', + returns('1'), + returns('1'), + ), + ( + '''foo: "'{{ 1 }}'"''', + returns("'1'"), + returns("'1'"), + ), + ( + '''foo: "'{{ '1' }}'"''', + returns("'1'"), + returns("'1'"), + ), + ( + '''foo: "{{ 1 | as_text }}"''', + returns('1'), + returns('1'), + ), + ( + '''foo: "{{ 1 | as_bool }}"''', + returns('1'), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ 1 | as_number }}"''', + returns('1'), + returns(1), + ), + ( + '''foo: "{{ 1 | as_native }}"''', + returns('1'), + returns(1), + ), + ( + '''foo: "{{ '1' | as_text }}"''', + returns('1'), + returns('1'), + ), + ( + '''foo: "{{ '1' | as_bool }}"''', + returns('1'), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ '1' | as_number }}"''', + returns('1'), + returns(1), + ), + ( + '''foo: "{{ '1' | as_native }}"''', + returns('1'), + returns(1), + ), + # booleans. + # Note the discrepancy with true vs True: `true` is recognized by jinja but + # not literal_eval, but `True` is recognized by ast.literal_eval. + # For extra fun, yaml recognizes both. + # unquoted true + ( + '''foo: "{{ True }}"''', + returns('True'), + returns('True'), + ), + ( + '''foo: "{{ True | as_text }}"''', + returns('True'), + returns('True'), + ), + ( + '''foo: "{{ True | as_bool }}"''', + returns('True'), + returns(True), + ), + ( + '''foo: "{{ True | as_number }}"''', + returns('True'), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ True | as_native }}"''', + returns('True'), + returns(True), + ), + # unquoted true + ( + '''foo: "{{ true }}"''', + returns("True"), + returns("True"), + ), + ( + '''foo: "{{ true | as_text }}"''', + returns("True"), + returns("True"), + ), + ( + '''foo: "{{ true | as_bool }}"''', + returns("True"), + returns(True), + ), + ( + '''foo: "{{ true | as_number }}"''', + returns("True"), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ true | as_native }}"''', + returns("True"), + returns(True), + ), + ( + '''foo: "{{ 'true' | as_text }}"''', + returns("true"), + returns("true"), + ), + # quoted 'true' + ( + '''foo: "'{{ true }}'"''', + returns("'True'"), + returns("'True'"), + ), # jinja true -> python True -> str(True) -> "True" -> quoted + ( + '''foo: "'{{ true | as_text }}'"''', + returns("'True'"), + returns("'True'"), + ), + ( + '''foo: "'{{ true | as_bool }}'"''', + returns("'True'"), + returns("'True'"), + ), + ( + '''foo: "'{{ true | as_number }}'"''', + returns("'True'"), + returns("'True'"), + ), + ( + '''foo: "'{{ true | as_native }}'"''', + returns("'True'"), + returns("'True'"), + ), + # unquoted True + ( + '''foo: "{{ True }}"''', + returns('True'), + returns('True'), + ), + ( + '''foo: "{{ True | as_text }}"''', + returns("True"), + returns("True"), + ), # True -> string 'True' -> text -> str('True') -> 'True' + ( + '''foo: "{{ True | as_bool }}"''', + returns("True"), + returns(True), + ), + ( + '''foo: "{{ True | as_number }}"''', + returns("True"), + raises(JinjaRenderingException), + ), + ( + '''foo: "{{ True | as_native }}"''', + returns("True"), + returns(True), + ), + # quoted 'True' within rendering + ( + '''foo: "{{ 'True' | as_text }}"''', + returns("True"), + returns("True"), + ), + # 'True' -> string 'True' -> text -> str('True') -> 'True' + ( + '''foo: "{{ 'True' | as_bool }}"''', + returns('True'), + returns(True), + ), + # quoted 'True' outside rendering + ( + '''foo: "'{{ True }}'"''', + returns("'True'"), + returns("'True'"), + ), + ( + '''foo: "'{{ True | as_bool }}'"''', + returns("'True'"), + returns("'True'"), + ), + # yaml turns 'yes' into a boolean true + ( + '''foo: yes''', + returns('True'), + returns('True'), + ), + ( + '''foo: "yes"''', + returns('yes'), + returns('yes'), + ), + # concatenation + ( + '''foo: "{{ (a_int + 100) | as_native }}"''', + returns('200'), + returns(200), + ), + ( + '''foo: "{{ (a_str ~ 100) | as_native }}"''', + returns('100100'), + returns(100100), + ), + ( + '''foo: "{{( a_int ~ 100) | as_native }}"''', + returns('100100'), + returns(100100), + ), + # multiple nodes -> always str + ( + '''foo: "{{ a_str | as_native }}{{ a_str | as_native }}"''', + returns('100100'), + returns('100100'), + ), + ( + '''foo: "{{ a_int | as_native }}{{ a_int | as_native }}"''', + returns('100100'), + returns('100100'), + ), + ( + '''foo: "'{{ a_int | as_native }}{{ a_int | as_native }}'"''', + returns("'100100'"), + returns("'100100'"), + ), + ( + '''foo:''', + returns('None'), + returns('None'), + ), + ( + '''foo: null''', + returns('None'), + returns('None'), + ), + ( + '''foo: ""''', + returns(''), + returns(''), + ), + ( + '''foo: "{{ '' | as_native }}"''', + returns(''), + returns(''), + ), + # very annoying, but jinja 'none' is yaml 'null'. + ( + '''foo: "{{ none | as_native }}"''', + returns('None'), + returns(None), + ), +] + + +@pytest.mark.parametrize( + 'value,text_expectation,native_expectation', + jinja_tests, + ids=expected_id +) +def test_jinja_rendering(value, text_expectation, native_expectation): + foo_value = yaml.safe_load(value)['foo'] + ctx = { + 'a_str': '100', + 'a_int': 100, + 'b_str': 'hello' + } + with text_expectation as text_result: + assert text_result == get_rendered(foo_value, ctx, native=False) + + with native_expectation as native_result: + assert native_result == get_rendered(foo_value, ctx, native=True) class TestJinja(unittest.TestCase): @@ -15,25 +384,25 @@ def test_do(self): self.assertEqual(mod.my_dict, {'a': 1}) def test_regular_render(self): - s = '{{ "some_value" }}' + s = '{{ "some_value" | as_native }}' value = get_rendered(s, {}, native=False) assert value == 'some_value' - s = '{{ 1991 }}' + s = '{{ 1991 | as_native }}' value = get_rendered(s, {}, native=False) assert value == '1991' s = '{{ "some_value" | as_text }}' - value = get_rendered(s, {}, native=True) + value = get_rendered(s, {}, native=False) assert value == 'some_value' s = '{{ 1991 | as_text }}' - value = get_rendered(s, {}, native=True) + value = get_rendered(s, {}, native=False) assert value == '1991' def test_native_render(self): - s = '{{ "some_value" }}' + s = '{{ "some_value" | as_native }}' value = get_rendered(s, {}, native=True) assert value == 'some_value' - s = '{{ 1991 }}' + s = '{{ 1991 | as_native }}' value = get_rendered(s, {}, native=True) assert value == 1991 @@ -411,5 +780,3 @@ def test_if_endfor_newlines(self): hi {% endmaterialization %} ''' - -