Skip to content

Commit

Permalink
feat: add default value expression (#1408)
Browse files Browse the repository at this point in the history
* feat: Adds default_value_expression to SchemaField
  • Loading branch information
aribray authored Nov 15, 2022
1 parent 931285f commit 207aa50
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 16 deletions.
38 changes: 37 additions & 1 deletion google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,38 @@ class SchemaField(object):
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
max_length: Maximum length of fields with STRING or BYTES type.
default_value_expression: str, Optional
Used to specify the default value of a field using a SQL expression. It can only be set for
top level fields (columns).
You can use a struct or array expression to specify default value for the entire struct or
array. The valid SQL expressions are:
- Literals for all data types, including STRUCT and ARRAY.
- The following functions:
`CURRENT_TIMESTAMP`
`CURRENT_TIME`
`CURRENT_DATE`
`CURRENT_DATETIME`
`GENERATE_UUID`
`RAND`
`SESSION_USER`
`ST_GEOPOINT`
- Struct or array composed with the above allowed functions, for example:
"[CURRENT_DATE(), DATE '2020-01-01'"]
"""

def __init__(
self,
name: str,
field_type: str,
mode: str = "NULLABLE",
default_value_expression: str = None,
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
fields: Iterable["SchemaField"] = (),
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
Expand All @@ -115,6 +140,8 @@ def __init__(
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
self._properties["description"] = description
if default_value_expression is not None:
self._properties["defaultValueExpression"] = default_value_expression
if precision is not _DEFAULT_VALUE:
self._properties["precision"] = precision
if scale is not _DEFAULT_VALUE:
Expand Down Expand Up @@ -154,13 +181,16 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)

default_value_expression = api_repr.get("defaultValueExpression", None)

if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
mode=mode.upper(),
default_value_expression=default_value_expression,
description=description,
name=api_repr["name"],
policy_tags=policy_tags,
Expand Down Expand Up @@ -197,6 +227,11 @@ def is_nullable(self):
"""bool: whether 'mode' is 'nullable'."""
return self.mode == "NULLABLE"

@property
def default_value_expression(self):
"""Optional[str] default value of a field, using an SQL expression"""
return self._properties.get("defaultValueExpression")

@property
def description(self):
"""Optional[str]: description for the field."""
Expand Down Expand Up @@ -260,7 +295,7 @@ def _key(self):
field_type = self.field_type.upper() if self.field_type is not None else None

# Type can temporarily be set to None if the code needs a SchemaField instance,
# but has npt determined the exact type of the field yet.
# but has not determined the exact type of the field yet.
if field_type is not None:
if field_type == "STRING" or field_type == "BYTES":
if self.max_length is not None:
Expand All @@ -281,6 +316,7 @@ def _key(self):
field_type,
# Mode is always str, if not given it defaults to a str value
self.mode.upper(), # pytype: disable=attribute-error
self.default_value_expression,
self.description,
self._fields,
policy_tags,
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any:
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')
None
The default value can be overrided with the ``default`` parameter.
The default value can be overridden with the ``default`` parameter.
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')
''
Expand Down
62 changes: 62 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self):
list(table.schema[1].policy_tags.names), [child_policy_tag.name]
)

def test_create_table_with_default_value_expression(self):
dataset = self.temp_dataset(
_make_dataset_id("create_table_with_default_value_expression")
)

table_id = "test_table"
timestamp_field_name = "timestamp_field_with_default_value_expression"

string_default_val_expression = "'FOO'"
timestamp_default_val_expression = "CURRENT_TIMESTAMP"

schema = [
bigquery.SchemaField(
"username",
"STRING",
default_value_expression=string_default_val_expression,
),
bigquery.SchemaField(
timestamp_field_name,
"TIMESTAMP",
default_value_expression=timestamp_default_val_expression,
),
]
table_arg = Table(dataset.table(table_id), schema=schema)
self.assertFalse(_table_exists(table_arg))

table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)

self.assertTrue(_table_exists(table))

# Fetch the created table and its metadata to verify that the default
# value expression is assigned to fields
remote_table = Config.CLIENT.get_table(table)
remote_schema = remote_table.schema
self.assertEqual(remote_schema, schema)

for field in remote_schema:
if field.name == string_default_val_expression:
self.assertEqual("'FOO'", field.default_value_expression)
if field.name == timestamp_default_val_expression:
self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression)

# Insert rows into the created table to verify default values are populated
# when value is not provided
NOW_SECONDS = 1448911495.484366
NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC)

# Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME
# Row #2 will have default `STRING` defaultValueExpression "'FOO"
ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}]

errors = Config.CLIENT.insert_rows(table, ROWS)
self.assertEqual(len(errors), 0)

# Get list of inserted rows
row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))]

# Assert that row values are populated with default value expression
self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime)
self.assertEqual("FOO", row_2.get("username"))

def test_create_table_w_time_partitioning_w_clustering_fields(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
Expand Down
40 changes: 28 additions & 12 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self):
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep",
"STRING",
"NULLABLE",
description="sales representative",
),
SchemaField(
"sales",
"FLOAT",
"NULLABLE",
description="total sales",
),
]

client = self._make_client()
Expand Down Expand Up @@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self):
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

client = self._make_client()
Expand Down Expand Up @@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self):
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

client = self._make_client()
Expand Down Expand Up @@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self):
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

fake_file = io.StringIO()
Expand Down
9 changes: 7 additions & 2 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def test_constructor_defaults(self):
self.assertIsNone(field.description)
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.default_value_expression)

def test_constructor_explicit(self):
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
field = self._make_one(
"test",
"STRING",
Expand All @@ -58,10 +60,12 @@ def test_constructor_explicit(self):
"projects/f/locations/g/taxonomies/h/policyTags/i",
)
),
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
self.assertEqual(field.mode, "REQUIRED")
self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION)
self.assertEqual(field.description, "Testing")
self.assertEqual(field.fields, ())
self.assertEqual(
Expand Down Expand Up @@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self):
self.assertEqual(field.field_type, "RECORD")
self.assertEqual(field.mode, "NULLABLE")
self.assertEqual(len(field.fields), 0)
self.assertEqual(field.default_value_expression, None)

# Keys not present in API representation shouldn't be included in
# _properties.
Expand Down Expand Up @@ -527,12 +532,12 @@ def test___hash__not_equals(self):

def test___repr__(self):
field1 = self._make_one("field1", "STRING")
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)"
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)"
self.assertEqual(repr(field1), expected)

def test___repr__type_not_set(self):
field1 = self._make_one("field1", field_type=None)
expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)"
expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)"
self.assertEqual(repr(field1), expected)

def test___repr__evaluable_no_policy_tags(self):
Expand Down

0 comments on commit 207aa50

Please sign in to comment.