Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

case insensitive check on partition matching #888

Merged
merged 2 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230818-214616.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: case insensitive check on partition matching
time: 2023-08-18T21:46:16.828488+02:00
custom:
Author: Kayrnt
Issue: "886"
2 changes: 1 addition & 1 deletion dbt/adapters/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dbt.adapters.bigquery.connections import BigQueryCredentials
from dbt.adapters.bigquery.relation import BigQueryRelation # noqa
from dbt.adapters.bigquery.column import BigQueryColumn # noqa
from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget # noqa
from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig # noqa

from dbt.adapters.base import AdapterPlugin
from dbt.include import bigquery
Expand Down
5 changes: 3 additions & 2 deletions dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,12 +620,13 @@ def _partitions_match(table, conf_partition: Optional[PartitionConfig]) -> bool:
table_field = (
table.time_partitioning.field.lower() if table.time_partitioning.field else None
)

table_granularity = table.partitioning_type
conf_table_field = conf_partition.field
return (
table_field == conf_table_field
table_field == conf_table_field.lower()
or (conf_partition.time_ingestion_partitioning and table_field is not None)
) and table_granularity == conf_partition.granularity
) and table_granularity.lower() == conf_partition.granularity.lower()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Op's table actually didn't have the granularity key set:

-- models/foo_sans_granularity.sql
{{
  config(
    partition_by={
      "field": "updated_at",
      "data_type": "date",
    }
  )
}}

select id, updated_at, updated_at_utc from sources.raw

Do we need to account for that?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Daily partitioning is the default for all column types.

My understanding from the dbt docs was that the default granularity is day-partitioned.

Is this where the default is set?
https://github.com/dbt-labs/dbt-bigquery/blob/main/dbt/adapters/bigquery/impl.py#L77

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, there is a default so it should be safe

elif conf_partition and table.range_partitioning is not None:
dest_part = table.range_partitioning
conf_part = conf_partition.range or {}
Expand Down
31 changes: 30 additions & 1 deletion tests/unit/test_bigquery_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@

import dbt.dataclass_schema

from dbt.adapters.bigquery import PartitionConfig
from dbt.adapters.bigquery import BigQueryCredentials
from dbt.adapters.bigquery import BigQueryAdapter
from dbt.adapters.bigquery import BigQueryRelation
from dbt.adapters.bigquery import Plugin as BigQueryPlugin
from google.cloud.bigquery.table import Table
from dbt.adapters.bigquery.connections import BigQueryConnectionManager
from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT
from dbt.adapters.base.query_headers import MacroQueryStringSetter
Expand Down Expand Up @@ -376,7 +378,10 @@ def test_location_user_agent(self, mock_bq, mock_auth_default):
mock_client.assert_not_called()
connection.handle
mock_client.assert_called_once_with(
"dbt-unit-000000", creds, location="Luna Station", client_info=HasUserAgent()
"dbt-unit-000000",
creds,
location="Luna Station",
client_info=HasUserAgent(),
)


Expand Down Expand Up @@ -1023,6 +1028,30 @@ def test_convert_time_type(self):
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_time_type(agate_table, col_idx) == expect

# The casing in this case can't be enforced on the API side,
# so we have to validate that we have a case-insensitive comparison
def test_partitions_match(self):
table = Table.from_api_repr(
{
"tableReference": {
"projectId": "test-project",
"datasetId": "test_dataset",
"tableId": "test_table",
},
"timePartitioning": {"type": "DAY", "field": "ts"},
}
)
partition_config = PartitionConfig.parse(
{
"field": "TS",
"data_type": "date",
"granularity": "day",
"time_ingestion_partitioning": False,
"copy_partitions": False,
}
)
assert BigQueryAdapter._partitions_match(table, partition_config) is True


class TestBigQueryGrantAccessTo(BaseTestBigQueryAdapter):
entity = BigQueryRelation.from_dict(
Expand Down