Skip to content

Commit

Permalink
Support configurable delimiter for seed files, default to comma (#3990)…
Browse files Browse the repository at this point in the history
… (#7186)

* Support configurable delimiter for seed files, default to comma (#3990)

* Update Features-20230317-144957.yaml

* Moved "delimiter" to seed config instead of node config

* Update core/dbt/clients/agate_helper.py

Co-authored-by: Cor <[email protected]>

* Update test_contracts_graph_parsed.py

* fixed integration tests

* Added functional tests for seed files with a unique delimiter

* Added docstrings

* Added a test for an empty string configured delimiter value

* whitespace

* ran black

* updated changie entry

* Update Features-20230317-144957.yaml

---------

Co-authored-by: Cor <[email protected]>
  • Loading branch information
ramonvermeulen and JCZuurmond authored Aug 1, 2023
1 parent 7872f6a commit 6130a6e
Show file tree
Hide file tree
Showing 10 changed files with 605 additions and 3 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230714-202445.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Added support to configure a delimiter for a seed file, defaults to comma
time: 2023-07-14T20:24:45.513847165+02:00
custom:
Author: ramonvermeulen
Issue: "3990"
4 changes: 2 additions & 2 deletions core/dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ def as_matrix(table):
return [r.values() for r in table.rows.values()]


def from_csv(abspath, text_columns):
def from_csv(abspath, text_columns, delimiter=","):
type_tester = build_type_tester(text_columns=text_columns)
with open(abspath, encoding="utf-8") as fp:
if fp.read(1) != BOM:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=type_tester)
return agate.Table.from_csv(fp, column_types=type_tester, delimiter=delimiter)


class _NullMarker:
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,9 @@ def load_agate_table(self) -> agate.Table:
assert self.model.root_path
path = os.path.join(self.model.root_path, self.model.original_file_path)
column_types = self.model.config.column_types
delimiter = self.model.config.delimiter
try:
table = agate_helper.from_csv(path, text_columns=column_types)
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model)
table.original_abspath = os.path.abspath(path)
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,7 @@ def field_mapping(cls):
@dataclass
class SeedConfig(NodeConfig):
materialized: str = "seed"
delimiter: str = ","
quote_columns: Optional[bool] = None

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions tests/adapter/dbt/tests/adapter/simple_seed/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
models__downstream_from_seed_actual = """
select * from {{ ref('seed_actual') }}
"""
models__downstream_from_seed_pipe_separated = """
select * from {{ ref('seed_pipe_separated') }}
"""
models__from_basic_seed = """
select * from {{ this.schema }}.seed_expected
Expand Down
505 changes: 505 additions & 0 deletions tests/adapter/dbt/tests/adapter/simple_seed/seeds.py

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions tests/adapter/dbt/tests/adapter/simple_seed/test_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dbt.tests.adapter.simple_seed.fixtures import (
models__downstream_from_seed_actual,
models__from_basic_seed,
models__downstream_from_seed_pipe_separated,
)

from dbt.tests.adapter.simple_seed.seeds import (
Expand All @@ -29,6 +30,7 @@
seeds__wont_parse_csv,
seed__unicode_csv,
seed__with_dots_csv,
seeds__pipe_separated_csv,
)


Expand Down Expand Up @@ -163,6 +165,84 @@ def test_simple_seed_with_drop_and_schema(self, project):
check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"])


class SeedUniqueDelimiterTestBase(SeedConfigBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"seeds": {"quote_columns": False, "delimiter": "|"},
}

@pytest.fixture(scope="class", autouse=True)
def setUp(self, project):
"""Create table for ensuring seeds and models used in tests build correctly"""
project.run_sql(seeds__expected_sql)

@pytest.fixture(scope="class")
def seeds(self, test_data_dir):
return {"seed_pipe_separated.csv": seeds__pipe_separated_csv}

@pytest.fixture(scope="class")
def models(self):
return {
"models__downstream_from_seed_pipe_separated.sql": models__downstream_from_seed_pipe_separated,
}

def _build_relations_for_test(self, project):
"""The testing environment needs seeds and models to interact with"""
seed_result = run_dbt(["seed"])
assert len(seed_result) == 1
check_relations_equal(project.adapter, ["seed_expected", "seed_pipe_separated"])

run_result = run_dbt()
assert len(run_result) == 1
check_relations_equal(
project.adapter, ["models__downstream_from_seed_pipe_separated", "seed_expected"]
)

def _check_relation_end_state(self, run_result, project, exists: bool):
assert len(run_result) == 1
check_relations_equal(project.adapter, ["seed_pipe_separated", "seed_expected"])
if exists:
check_table_does_exist(project.adapter, "models__downstream_from_seed_pipe_separated")
else:
check_table_does_not_exist(
project.adapter, "models__downstream_from_seed_pipe_separated"
)


class TestSeedWithUniqueDelimiter(SeedUniqueDelimiterTestBase):
def test_seed_with_unique_delimiter(self, project):
"""Testing correct run of seeds with a unique delimiter (pipe in this case)"""
self._build_relations_for_test(project)
self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True)


class TestSeedWithWrongDelimiter(SeedUniqueDelimiterTestBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"seeds": {"quote_columns": False, "delimiter": ";"},
}

def test_seed_with_wrong_delimiter(self, project):
"""Testing failure of running dbt seed with a wrongly configured delimiter"""
seed_result = run_dbt(["seed"], expect_pass=False)
assert "syntax error" in seed_result.results[0].message.lower()


class TestSeedWithEmptyDelimiter(SeedUniqueDelimiterTestBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"seeds": {"quote_columns": False, "delimiter": ""},
}

def test_seed_with_empty_delimiter(self, project):
"""Testing failure of running dbt seed with an empty configured delimiter value"""
seed_result = run_dbt(["seed"], expect_pass=False)
assert "compilation error" in seed_result.results[0].message.lower()


class TestSimpleSeedEnabledViaConfig(object):
@pytest.fixture(scope="session")
def seeds(self):
Expand Down
1 change: 1 addition & 0 deletions tests/functional/artifacts/expected_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_rendered_seed_config(**updates):
"pre-hook": [],
"post-hook": [],
"column_types": {},
"delimiter": ",",
"quoting": {},
"tags": [],
"quote_columns": True,
Expand Down
1 change: 1 addition & 0 deletions tests/functional/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ def expect_seed_output(self):
"pre-hook": [],
"quoting": {},
"column_types": {},
"delimiter": ",",
"persist_docs": {},
"quote_columns": False,
"full_refresh": None,
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/test_contracts_graph_parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ def basic_parsed_seed_dict():
"alias": "foo",
"config": {
"column_types": {},
"delimiter": ",",
"enabled": True,
"materialized": "seed",
"persist_docs": {},
Expand Down Expand Up @@ -611,6 +612,7 @@ def complex_parsed_seed_dict():
"alias": "foo",
"config": {
"column_types": {},
"delimiter": ",",
"enabled": True,
"materialized": "seed",
"persist_docs": {"relation": True, "columns": True},
Expand Down Expand Up @@ -669,6 +671,7 @@ def complex_parsed_seed_object():
alias="foo",
config=SeedConfig(
quote_columns=True,
delimiter=",",
persist_docs={"relation": True, "columns": True},
),
deferred=False,
Expand Down

0 comments on commit 6130a6e

Please sign in to comment.