-
Notifications
You must be signed in to change notification settings - Fork 195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
allows naming conventions to be changed #998
Changes from 1 commit
93995b9
4444878
b3b70f6
f5d7a0a
83dc38a
21ebfee
7985f9d
07f285e
06e441e
3e846a1
37b4a31
934c167
7fa574d
8c7942d
f951fc0
387a7c7
88728e1
1a53425
8835023
f4c504f
874cc29
c4e9f35
6345377
96a02ff
aef8cc2
a53c00b
91f5780
5984824
3458441
55362b0
b836dfe
e50bfaa
42d149f
c53808f
b97ae53
0132c2f
2a7c5dd
d502c7c
3e7504b
3bb929f
9f0920c
724dc15
b58a118
d190ea1
b445654
ee8a95b
eb30838
558db91
dea9669
b1e2b09
210be70
95b703d
4b72b77
ab39e06
2ae3ad2
09b7731
cfd3e5f
0edbbfd
5769ba1
1f17a44
71e418b
ce414e1
bde61a9
036e3dd
8546763
f57e286
cf50bd4
72969ce
656d5fc
bbd7fe6
a671508
81e0db9
8a32793
0dc6dc8
4a39795
43d6d5f
e3d998c
3aef3fd
6df7a34
57aec2e
fee7af5
96c7222
116add0
42eacaf
3793d06
88eec9c
8e0f0a8
b1c095c
46ec732
2194b18
1384ed3
b00cbb2
a530345
4271895
abd02df
14b4b0e
60e45b1
a84be2a
1dc7a09
ab69b76
0eeb21d
f1097d8
3855fcc
651412e
aab36e1
7294aae
dc10473
727a35e
4cb2646
f098e5a
1521778
796483e
534c7f8
5f4cb4c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import os | ||
import pytest | ||
|
||
from dlt.common.utils import uniq_id | ||
|
||
from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration | ||
from tests.cases import arrow_table_all_data_types, prepare_shuffled_tables | ||
from tests.pipeline.utils import assert_data_table_counts, assert_load_info, load_tables_to_dicts | ||
from tests.utils import TestDataItemFormat | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"destination_config", | ||
destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), | ||
ids=lambda x: x.name, | ||
) | ||
@pytest.mark.parametrize("item_type", ["object", "table"]) | ||
def test_load_csv( | ||
destination_config: DestinationTestConfiguration, item_type: TestDataItemFormat | ||
) -> None: | ||
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note to self: if I set up those helper functions correctly, we can also easily inspect gzipped files without this setting. ibis / dataframes will make this stuff obsolete anyway because we can use that in the tests in the future There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sh-rp I did it already for tead_text is fsclient. will push changes thoday |
||
pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) | ||
table, shuffled_table, shuffled_removed_column = prepare_shuffled_tables() | ||
|
||
# convert to pylist when loading from objects, this will kick the csv-reader in | ||
if item_type == "object": | ||
table, shuffled_table, shuffled_removed_column = ( | ||
table.to_pylist(), | ||
shuffled_table.to_pylist(), | ||
shuffled_removed_column.to_pylist(), | ||
) | ||
|
||
load_info = pipeline.run( | ||
[shuffled_removed_column, shuffled_table, table], | ||
table_name="table", | ||
loader_file_format="csv", | ||
) | ||
assert_load_info(load_info) | ||
job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path | ||
assert job.endswith("csv") | ||
assert_data_table_counts(pipeline, {"table": 5432 * 3}) | ||
load_tables_to_dicts(pipeline, "table") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"destination_config", | ||
destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), | ||
ids=lambda x: x.name, | ||
) | ||
def test_empty_csv_from_arrow(destination_config: DestinationTestConfiguration) -> None: | ||
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" | ||
os.environ["RESTORE_FROM_DESTINATION"] = "False" | ||
pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) | ||
table, _, _ = arrow_table_all_data_types("arrow-table", include_json=False) | ||
|
||
load_info = pipeline.run( | ||
table.schema.empty_table(), table_name="arrow_table", loader_file_format="csv" | ||
) | ||
assert_load_info(load_info) | ||
assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 1 | ||
job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path | ||
assert job.endswith("csv") | ||
assert_data_table_counts(pipeline, {"arrow_table": 0}) | ||
with pipeline.sql_client() as client: | ||
with client.execute_query("SELECT * FROM arrow_table") as cur: | ||
columns = [col.name for col in cur.description] | ||
assert len(cur.fetchall()) == 0 | ||
|
||
# all columns in order, also casefold to the destination casing (we use cursor.description) | ||
casefold = pipeline.destination.capabilities().casefold_identifier | ||
assert columns == list( | ||
map(casefold, pipeline.default_schema.get_table_columns("arrow_table").keys()) | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should probably filter the tests by loader_file_format in caps and not explicitely in the test header, then all destinations that support csv will automatically be added to this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
90% of explicit config can be inferred from caps and config interfaces