From 3dc574e5ffad53e21bfa352b10bf4ed246dc9f95 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 18 Aug 2021 09:02:12 -0400 Subject: [PATCH] [Backport] #3703 to v0.20 (#3765) * Fix/bigquery job label length (#3703) * add blueprints to resolve issue * revert to previous version * intentionally failing test * add imports * add validation in existing function * add passing test for length validation * add current sanitized label * remove duplicate var * Make logging output 2 lines Co-authored-by: Jeremy Cohen * Raise RuntimeException to better handle error Co-authored-by: Jeremy Cohen * update test * fix flake8 errors * update changelog Co-authored-by: Jeremy Cohen * Update changelog Co-authored-by: sungchun12 --- CHANGELOG.md | 6 +++++ .../dbt/adapters/bigquery/connections.py | 13 ++++++++++- test/unit/test_bigquery_adapter.py | 23 ++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0120fca170..3b85da4b69e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## dbt 0.20.2 (Release TBD) +### Under the hood +- Better error handling for BigQuery job labels that are too long. ([#3612](https://github.com/dbt-labs/dbt/pull/3612), [#3703](https://github.com/dbt-labs/dbt/pull/3703)) + +Contributors: +- [@sungchun12](https://github.com/sungchun12) ([#3703](https://github.com/dbt-labs/dbt/pull/3703)) + ## dbt 0.20.2rc1 (August 16, 2021) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 137b02d4f48..62de829b23b 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -595,9 +595,20 @@ def _is_retryable(error): _SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]") +_VALIDATE_LABEL_LENGTH_LIMIT = 63 + def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) - return value + value_length = len(value) + if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: + error_msg = ( + f"Job label length {value_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" + f"Current sanitized label: {value}" + ) + raise RuntimeException(error_msg) + else: + return value diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 5f7d79054e6..147b8cba0df 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -1,6 +1,8 @@ import agate import decimal import json +import string +import random import re import pytest import unittest @@ -17,7 +19,7 @@ from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery import Plugin as BigQueryPlugin from dbt.adapters.bigquery.connections import BigQueryConnectionManager -from dbt.adapters.bigquery.connections import _sanitize_label +from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT from dbt.adapters.base.query_headers import MacroQueryStringSetter from dbt.clients import agate_helper import dbt.exceptions @@ -972,3 +974,22 @@ def test_convert_time_type(self): ) def test_sanitize_label(input, output): assert _sanitize_label(input) == output + + +@pytest.mark.parametrize( + "label_length", + [64, 65, 100], +) +def test_sanitize_label_length(label_length): + random_string = "".join( + random.choice(string.ascii_uppercase + string.digits) + for i in range(label_length) + ) + test_error_msg = ( + f"Job label length {label_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" + f"Current sanitized label: {random_string.lower()}" + ) + with pytest.raises(dbt.exceptions.RuntimeException) as error_info: + _sanitize_label(random_string) + assert error_info.value.args[0] == test_error_msg