From c791777a2a55c268ab085859692356a3d736c25b Mon Sep 17 00:00:00 2001 From: sungchun12 Date: Tue, 17 Aug 2021 13:54:31 -0500 Subject: [PATCH] Fix/bigquery job label length (#3703) * add blueprints to resolve issue * revert to previous version * intentionally failing test * add imports * add validation in existing function * add passing test for length validation * add current sanitized label * remove duplicate var * Make logging output 2 lines Co-authored-by: Jeremy Cohen * Raise RuntimeException to better handle error Co-authored-by: Jeremy Cohen * update test * fix flake8 errors * update changelog Co-authored-by: Jeremy Cohen --- CHANGELOG.md | 1 + .../dbt/adapters/bigquery/connections.py | 13 ++++++++++- test/unit/test_bigquery_adapter.py | 23 ++++++++++++++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0120fca170..95da4a668cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ### Under the hood - Switch to full reparse on partial parsing exceptions. Log and report exception information. ([#3725](https://github.com/dbt-labs/dbt/issues/3725), [#3733](https://github.com/dbt-labs/dbt/pull/3733)) - Check for existence of test node when removing. ([#3711](https://github.com/dbt-labs/dbt/issues/3711), [#3750](https://github.com/dbt-labs/dbt/pull/3750)) +- Better error handling for BigQuery job labels that are too long. [#3703](https://github.com/dbt-labs/dbt/pull/3703) ## dbt 0.20.1 (August 11, 2021) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 137b02d4f48..62de829b23b 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -595,9 +595,20 @@ def _is_retryable(error): _SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]") +_VALIDATE_LABEL_LENGTH_LIMIT = 63 + def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) - return value + value_length = len(value) + if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: + error_msg = ( + f"Job label length {value_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" + f"Current sanitized label: {value}" + ) + raise RuntimeException(error_msg) + else: + return value diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 5f7d79054e6..147b8cba0df 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -1,6 +1,8 @@ import agate import decimal import json +import string +import random import re import pytest import unittest @@ -17,7 +19,7 @@ from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery import Plugin as BigQueryPlugin from dbt.adapters.bigquery.connections import BigQueryConnectionManager -from dbt.adapters.bigquery.connections import _sanitize_label +from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT from dbt.adapters.base.query_headers import MacroQueryStringSetter from dbt.clients import agate_helper import dbt.exceptions @@ -972,3 +974,22 @@ def test_convert_time_type(self): ) def test_sanitize_label(input, output): assert _sanitize_label(input) == output + + +@pytest.mark.parametrize( + "label_length", + [64, 65, 100], +) +def test_sanitize_label_length(label_length): + random_string = "".join( + random.choice(string.ascii_uppercase + string.digits) + for i in range(label_length) + ) + test_error_msg = ( + f"Job label length {label_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" + f"Current sanitized label: {random_string.lower()}" + ) + with pytest.raises(dbt.exceptions.RuntimeException) as error_info: + _sanitize_label(random_string) + assert error_info.value.args[0] == test_error_msg