Skip to content

Commit

Permalink
fix: add warning when encountering unknown field types (#1989)
Browse files Browse the repository at this point in the history
* fix: add warning when encountering unknown field types

The types returned for currently unsupported field types may change
in the future, when support is added. Warn users that the types they
are using are not yet supported.

* fix: add warning for unknown subfield types as well

* fix: remove unused warnings

* fix: remove leftover debugging code

* move test case closer to related test

* add comments

* fix formatting

* fix test_table and use warnings.warn instead of pytest.warn

* add explicit warning about behavior subject to change in the future
add warning for write and warn about future behavior changes

* add default converter for _SCALAR_VALUE_TO_JSON_PARAM

* factor out shared warning

* fix test case and make coverage happy

* add unit test to StructQueryParameter class

---------

Co-authored-by: Lingqing Gan <[email protected]>
  • Loading branch information
suzmue and Linchin authored Aug 13, 2024
1 parent d0bb87a commit 8f5a41d
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 26 deletions.
38 changes: 28 additions & 10 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import math
import re
import os
import warnings
from typing import Optional, Union

from dateutil import relativedelta
Expand Down Expand Up @@ -297,12 +298,7 @@ def _record_from_json(value, field):
record = {}
record_iter = zip(field.fields, value["f"])
for subfield, cell in record_iter:
converter = _CELLDATA_FROM_JSON[subfield.field_type]
if subfield.mode == "REPEATED":
value = [converter(item["v"], subfield) for item in cell["v"]]
else:
value = converter(cell["v"], subfield)
record[subfield.name] = value
record[subfield.name] = _field_from_json(cell["v"], subfield)
return record


Expand Down Expand Up @@ -382,7 +378,11 @@ def _field_to_index_mapping(schema):


def _field_from_json(resource, field):
converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value)
def default_converter(value, field):
_warn_unknown_field_type(field)
return value

converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter)
if field.mode == "REPEATED":
return [converter(item["v"], field) for item in resource]
else:
Expand Down Expand Up @@ -484,6 +484,11 @@ def _json_to_json(value):
return json.dumps(value)


def _string_to_json(value):
"""NOOP string -> string coercion"""
return value


def _timestamp_to_json_parameter(value):
"""Coerce 'value' to an JSON-compatible representation.
Expand Down Expand Up @@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value):
"DATE": _date_to_json,
"TIME": _time_to_json,
"JSON": _json_to_json,
"STRING": _string_to_json,
# Make sure DECIMAL and BIGDECIMAL are handled, even though
# requests for them should be converted to NUMERIC. Better safe
# than sorry.
Expand All @@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value):
_SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter


def _warn_unknown_field_type(field):
warnings.warn(
"Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format(
field.field_type, field.name
),
FutureWarning,
)


def _scalar_field_to_json(field, row_value):
"""Maps a field and value to a JSON-safe value.
Expand All @@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value):
Returns:
Any: A JSON-serializable object.
"""
converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
if converter is None: # STRING doesn't need converting
return row_value

def default_converter(value):
_warn_unknown_field_type(field)
return value

converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter)
return converter(row_value)


Expand Down
4 changes: 3 additions & 1 deletion google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None):
metadata=metadata,
)

warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
warnings.warn(
"Unable to determine Arrow type for field '{}'.".format(bq_field.name)
)
return None


Expand Down
20 changes: 9 additions & 11 deletions google/cloud/bigquery/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,9 +591,8 @@ def to_api_repr(self) -> dict:
Dict: JSON mapping
"""
value = self.value
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
if converter is not None:
value = converter(value) # type: ignore
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value)
value = converter(value) # type: ignore
resource: Dict[str, Any] = {
"parameterType": {"type": self.type_},
"parameterValue": {"value": value},
Expand Down Expand Up @@ -748,9 +747,10 @@ def to_api_repr(self) -> dict:
else:
a_type = self.array_type.to_api_repr()

converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"])
if converter is not None:
values = [converter(value) for value in values] # type: ignore
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(
a_type["type"], lambda value: value
)
values = [converter(value) for value in values] # type: ignore
a_values = [{"value": value} for value in values]

resource = {
Expand Down Expand Up @@ -792,7 +792,7 @@ def __repr__(self):


class StructQueryParameter(_AbstractQueryParameter):
"""Named / positional query parameters for struct values.
"""Name / positional query parameters for struct values.
Args:
name (Optional[str]):
Expand Down Expand Up @@ -897,10 +897,8 @@ def to_api_repr(self) -> dict:
values[name] = repr_["parameterValue"]
else:
s_types[name] = {"name": name, "type": {"type": type_}}
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
if converter is not None:
value = converter(value) # type: ignore
values[name] = {"value": value}
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value)
values[name] = {"value": converter(value)}

resource = {
"parameterType": {
Expand Down
62 changes: 61 additions & 1 deletion tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import decimal
import json
import os
import warnings
import pytest
import packaging
import unittest
Expand Down Expand Up @@ -640,6 +641,17 @@ def test_w_single_scalar_column(self):
row = {"f": [{"v": "1"}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_unknown_type(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "col", "UNKNOWN")
row = {"f": [{"v": "1"}]}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(self._call_fut(row, schema=[col]), ("1",))
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))
self.assertTrue("col" in str(warning))

def test_w_single_scalar_geography_column(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "geo", "GEOGRAPHY")
Expand All @@ -660,6 +672,17 @@ def test_w_single_array_column(self):
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))

def test_w_unknown_type_repeated(self):
# SELECT 1 AS col
col = _Field("REPEATED", "col", "UNKNOWN")
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],))
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))
self.assertTrue("col" in str(warning))

def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field("REPEATED", "first", "INTEGER")
Expand All @@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self):
({"first": [1, 2], "second": 3, "third": [4, 5]},),
)

def test_w_unknown_type_subfield(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field("REPEATED", "first", "UNKNOWN1")
second = _Field("REQUIRED", "second", "UNKNOWN2")
third = _Field("REPEATED", "third", "INTEGER")
col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
row = {
"f": [
{
"v": {
"f": [
{"v": [{"v": "1"}, {"v": "2"}]},
{"v": "3"},
{"v": [{"v": "4"}, {"v": "5"}]},
]
}
}
]
}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(
self._call_fut(row, schema=[col]),
({"first": ["1", "2"], "second": "3", "third": [4, 5]},),
)
self.assertEqual(len(warned), 2) # 1 warning per unknown field.
warned = [str(warning) for warning in warned]
self.assertTrue(
any(["first" in warning and "UNKNOWN1" in warning for warning in warned])
)
self.assertTrue(
any(["second" in warning and "UNKNOWN2" in warning for warning in warned])
)

def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field("REQUIRED", "first", "INTEGER")
Expand Down Expand Up @@ -1076,8 +1132,12 @@ def _call_fut(self, field, value):
def test_w_unknown_field_type(self):
field = _make_field("UNKNOWN")
original = object()
converted = self._call_fut(field, original)
with warnings.catch_warnings(record=True) as warned:
converted = self._call_fut(field, original)
self.assertIs(converted, original)
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))

def test_w_known_field_type(self):
field = _make_field("INT64")
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self):
param = self._make_one("foo", scalar_1, sub)
self.assertEqual(param.to_api_repr(), EXPECTED)

def test_to_api_repr_w_unknown_type(self):
EXPECTED = {
"name": "foo",
"parameterType": {
"type": "STRUCT",
"structTypes": [
{"name": "bar", "type": {"type": "INT64"}},
{"name": "baz", "type": {"type": "UNKNOWN_TYPE"}},
],
},
"parameterValue": {
"structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}}
},
}
sub_1 = _make_subparam("bar", "INT64", 123)
sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc")
param = self._make_one("foo", sub_1, sub_2)
self.assertEqual(param.to_api_repr(), EXPECTED)

def test___eq___wrong_type(self):
field = self._make_one("test", _make_subparam("bar", "STRING", "abc"))
other = object()
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self):
self.assertEqual(ages, [33, 29])
self.assertEqual(sports, ["volleyball", "basketball"])

self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("sport" in str(warning))
# Expect warning from both the arrow conversion, and the json deserialization.
self.assertEqual(len(warned), 2)
self.assertTrue(all("sport" in str(warning) for warning in warned))

def test_to_arrow_w_empty_table(self):
pyarrow = pytest.importorskip(
Expand Down

0 comments on commit 8f5a41d

Please sign in to comment.