-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: Finally was able to make some time for this! This is a prototype-quality dbt integration, but it demonstrates what the shape of this would look like. I copied the example from https://github.com/fishtown-analytics/jaffle_shop/ This shells out to dbt itself and runs against the database in the examples docker container. (I had to manually create the database). It just parses stdout with regex's, which is quite fragile. It would be better if dbt emitted some sort of structured log so that this could be parsed more reliably. Taylor is excited enough about this possibility that he might tackle it. See dbt-labs/dbt-core#1237 This emits materializations for each view or table created in the example. This has not been thoroughly tested. Next Steps: - Parse materializations out of the dbt project and render them as outputs - Also create a type-per-model and render metadata within dagit. - Consume dbt tests during a run and emit Expectations. - Model "seeds" as inputs into the dbt node. Test Plan: Run jaffle example in dagit. Buildkite Reviewers: #ft, natekupp Reviewed By: #ft, natekupp Subscribers: natekupp Differential Revision: https://dagster.phacility.com/D795
- Loading branch information
Showing
12 changed files
with
221 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from dagster import file_relative_path, pipeline | ||
|
||
from dagster_dbt import create_dbt_solid | ||
|
||
jaffle_solid = create_dbt_solid(file_relative_path(__file__, 'jaffle_shop')) | ||
|
||
|
||
@pipeline | ||
def jaffle_pipeline(): | ||
jaffle_solid() # pylint: disable=no-value-for-parameter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[run] | ||
branch = True |
101 changes: 101 additions & 0 deletions
101
python_modules/libraries/dagster-dbt/dagster_dbt/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from collections import namedtuple | ||
import io | ||
import os | ||
import re | ||
import shlex | ||
import subprocess | ||
|
||
from dagster import ( | ||
check, | ||
EventMetadataEntry, | ||
Failure, | ||
Materialization, | ||
Nothing, | ||
Output, | ||
OutputDefinition, | ||
solid, | ||
) | ||
|
||
CREATE_VIEW_REGEX = re.compile(r'OK created view model (\w+)\.(\w+)\.* \[CREATE VIEW') | ||
CREATE_TABLE_REGEX = re.compile(r'OK created table model (\w+)\.(\w+)\.* \[SELECT (\d+)') | ||
ANSI_ESCAPE = re.compile(r'\x1B[@-_][0-?]*[ -/]*[@-~]') | ||
|
||
|
||
def try_parse_create_view(text): | ||
view_match = CREATE_VIEW_REGEX.search(text) | ||
|
||
if not view_match: | ||
return None | ||
|
||
return Materialization( | ||
label='create_view', | ||
description=text, | ||
metadata_entries=[ | ||
EventMetadataEntry.text(view_match.group(1), 'schema'), | ||
EventMetadataEntry.text(view_match.group(2), 'view'), | ||
], | ||
) | ||
|
||
|
||
def try_parse_create_table(text): | ||
table_match = CREATE_TABLE_REGEX.search(text) | ||
|
||
if not table_match: | ||
return None | ||
|
||
return Materialization( | ||
label='create_table', | ||
description=text, | ||
metadata_entries=[ | ||
EventMetadataEntry.text(table_match.group(1), 'schema'), | ||
EventMetadataEntry.text(table_match.group(2), 'table'), | ||
EventMetadataEntry.text(table_match.group(3), 'row_count'), | ||
], | ||
) | ||
|
||
|
||
def try_parse(text): | ||
for parser in [try_parse_create_view, try_parse_create_table]: | ||
mat = parser(text) | ||
if mat: | ||
return mat | ||
|
||
|
||
def create_dbt_solid(project_dir, name=None): | ||
check.str_param(project_dir, 'project_dir') | ||
check.opt_str_param(name, 'name') | ||
|
||
@solid( | ||
name=name if name else os.path.basename(project_dir), | ||
output_defs=[OutputDefinition(dagster_type=Nothing, name='run_complete')], | ||
) | ||
def dbt_solid(_): | ||
args = shlex.split('dbt run --project-dir {}'.format(project_dir)) | ||
proc = subprocess.Popen(args, stdout=subprocess.PIPE) | ||
|
||
# if https://github.com/fishtown-analytics/dbt/issues/1237 gets done | ||
# we should definitely switch to parsing the json output, as that | ||
# would be much more reliable/resilient | ||
for line in io.TextIOWrapper(proc.stdout, encoding='utf-8'): | ||
text = line.rstrip() | ||
if not text: | ||
continue | ||
|
||
# print to stdout | ||
print(text) | ||
|
||
# remove colors | ||
text = ANSI_ESCAPE.sub('', text) | ||
|
||
mat = try_parse(text) | ||
if mat: | ||
yield mat | ||
|
||
proc.wait() | ||
|
||
if proc.returncode != 0: | ||
raise Failure('Dbt invocation errored') | ||
|
||
yield Output(value=None, output_name='run_complete') | ||
|
||
return dbt_solid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__version__ = '0.5.5' | ||
|
||
__nightly__ = 'nightly-2019.07.29' |
Empty file.
32 changes: 32 additions & 0 deletions
32
python_modules/libraries/dagster-dbt/dagster_dbt_tests/test_regexes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from dagster_dbt import CREATE_TABLE_REGEX, CREATE_VIEW_REGEX | ||
|
||
TEST_CREATE_VIEW = ( | ||
'17:36:00 | 3 of 8 OK created view model ' | ||
'dbt_alice.stg_customers................. [CREATE VIEW in 0.18s]' | ||
) | ||
|
||
TEST_CREATE_TABLE = ( | ||
'17:36:01 | 4 of 8 OK created table model ' | ||
'dbt_alice.order_payments............... [SELECT 99 in 0.07s]' | ||
) | ||
|
||
|
||
def test_match_view_model(): | ||
m = CREATE_VIEW_REGEX.search(TEST_CREATE_VIEW) | ||
assert m | ||
schema = m.group(1) | ||
assert schema == 'dbt_alice' | ||
view = m.group(2) | ||
assert view == 'stg_customers' | ||
|
||
|
||
def test_match_table_model(): | ||
m = CREATE_TABLE_REGEX.search(TEST_CREATE_TABLE) | ||
assert m | ||
|
||
schema = m.group(1) | ||
assert schema == 'dbt_alice' | ||
table = m.group(2) | ||
assert table == 'order_payments' | ||
row_count = int(m.group(3)) | ||
assert row_count == 99 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import argparse | ||
import sys | ||
|
||
from setuptools import find_packages, setup | ||
|
||
|
||
def get_version(name): | ||
version = {} | ||
with open('dagster_dbt/version.py') as fp: | ||
exec(fp.read(), version) # pylint: disable=W0122 | ||
|
||
if name == 'dagster-dbt': | ||
return version['__version__'] | ||
elif name == 'dagster-dbt-nightly': | ||
return version['__nightly__'] | ||
else: | ||
raise Exception('Shouldn\'t be here: bad package name {name}'.format(name=name)) | ||
|
||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--nightly', action='store_true') | ||
|
||
|
||
def _do_setup(name='dagster-dbt'): | ||
setup( | ||
name='dagster_dbt', | ||
version=get_version(name), | ||
author='Elementl', | ||
license='Apache-2.0', | ||
description='Package for Dagster dbt integration.', | ||
url='https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-dbt', | ||
classifiers=[ | ||
'Programming Language :: Python :: 2.7', | ||
'Programming Language :: Python :: 3.5', | ||
'Programming Language :: Python :: 3.6', | ||
'License :: OSI Approved :: Apache Software License', | ||
'Operating System :: OS Independent', | ||
], | ||
packages=find_packages(exclude=['test']), | ||
install_requires=['dagster', 'dbt-core'], | ||
zip_safe=False, | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
parsed, unparsed = parser.parse_known_args() | ||
sys.argv = [sys.argv[0]] + unparsed | ||
if parsed.nightly: | ||
_do_setup('dagster-dbt-nightly') | ||
else: | ||
_do_setup('dagster-dbt') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[tox] | ||
envlist = py37,py36,py35,py27 | ||
|
||
[testenv] | ||
passenv = CI_* COVERALLS_REPO_TOKEN | ||
deps = | ||
-e ../../dagster | ||
-r ../../dagster/dev-requirements.txt | ||
-e . | ||
commands = | ||
coverage erase | ||
pytest -vv --junitxml=test_results.xml --cov=dagster_dbt --cov-append --cov-report= | ||
coverage report --omit='.tox/*,**/test_*.py' --skip-covered | ||
coverage html --omit='.tox/*,**/test_*.py' | ||
coverage xml --omit='.tox/*,**/test_*.py' |