Skip to content

Commit

Permalink
feat(bigquery/storage/managedwriter): improve proto3 normalization (#…
Browse files Browse the repository at this point in the history
…6082)

* feat(bigquery/storage/managedwriter): improve proto3 normalization

This PR does mildly alarming things in the service of enabling proto3
support for users.

NormalizeDescriptor will now handle altering a proto3 descriptor to
an equivalent behavior using proto2.  In the case of proto3 where
field presence isn't used (e.g. no optional keyword), normalization
uses the ability in proto2 to set optional defaults to mimic the
proto3 behavior where scalar types are the default type value (0 for
numerics, empty string for strings/bytes, and first defined enum value
for enums).

This doesn't, however, completely enable proto3 support.  We still need
to enable the backend to deal properly with the well-known wrapper
types, which is also work in flight with the backend.
  • Loading branch information
shollyman authored Jun 14, 2022
1 parent 90489b1 commit 6a742ff
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 39 deletions.
33 changes: 33 additions & 0 deletions bigquery/storage/managedwriter/adapt/protoconversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ func tableFieldSchemaToFieldDescriptorProto(field *storagepb.TableFieldSchema, i
// In addition to nesting messages, this method also handles some encapsulation of enum types to avoid possible
// conflicts due to ambiguities, and clears oneof indices as oneof isn't a concept that maps into BigQuery
// schemas.
//
// To enable proto3 usage, this function will also rewrite proto3 descriptors into equivalent proto2 form.
// Such rewrites include setting the appropriate default values for proto3 fields.
func NormalizeDescriptor(in protoreflect.MessageDescriptor) (*descriptorpb.DescriptorProto, error) {
return normalizeDescriptorInternal(in, newStringSet(), newStringSet(), newStringSet(), nil)
}
Expand All @@ -311,6 +314,36 @@ func normalizeDescriptorInternal(in protoreflect.MessageDescriptor, visitedTypes
for i := 0; i < in.Fields().Len(); i++ {
inField := in.Fields().Get(i)
resultFDP := protodesc.ToFieldDescriptorProto(inField)
// For proto3 messages without presence, use proto2 default values to match proto3
// behavior in default values.
if inField.Syntax() == protoreflect.Proto3 && inField.Cardinality() != protoreflect.Repeated {
// Only set default value if there's no field presence.
if resultFDP.Proto3Optional == nil || !resultFDP.GetProto3Optional() {
switch resultFDP.GetType() {
case descriptorpb.FieldDescriptorProto_TYPE_BOOL:
resultFDP.DefaultValue = proto.String("false")
case descriptorpb.FieldDescriptorProto_TYPE_BYTES, descriptorpb.FieldDescriptorProto_TYPE_STRING:
resultFDP.DefaultValue = proto.String("")
case descriptorpb.FieldDescriptorProto_TYPE_ENUM:
// Resolve the proto3 default value. The default value should be the value name.
defValue := inField.Enum().Values().ByNumber(inField.Default().Enum())
resultFDP.DefaultValue = proto.String(string(defValue.Name()))
case descriptorpb.FieldDescriptorProto_TYPE_DOUBLE,
descriptorpb.FieldDescriptorProto_TYPE_FLOAT,
descriptorpb.FieldDescriptorProto_TYPE_INT64,
descriptorpb.FieldDescriptorProto_TYPE_UINT64,
descriptorpb.FieldDescriptorProto_TYPE_INT32,
descriptorpb.FieldDescriptorProto_TYPE_FIXED64,
descriptorpb.FieldDescriptorProto_TYPE_FIXED32,
descriptorpb.FieldDescriptorProto_TYPE_UINT32,
descriptorpb.FieldDescriptorProto_TYPE_SFIXED32,
descriptorpb.FieldDescriptorProto_TYPE_SFIXED64,
descriptorpb.FieldDescriptorProto_TYPE_SINT32,
descriptorpb.FieldDescriptorProto_TYPE_SINT64:
resultFDP.DefaultValue = proto.String("0")
}
}
}
// Clear proto3 optional annotation, as the backend converter can
// treat this as a proto2 optional.
if resultFDP.Proto3Optional != nil {
Expand Down
183 changes: 168 additions & 15 deletions bigquery/storage/managedwriter/adapt/protoconversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,23 +568,25 @@ func TestNormalizeDescriptor(t *testing.T) {
Name: proto.String("google_protobuf_Int64Value"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("value"),
JsonName: proto.String("value"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
Name: proto.String("value"),
JsonName: proto.String("value"),
Number: proto.Int32(1),
DefaultValue: proto.String("0"),
Type: descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
{
Name: proto.String("google_protobuf_StringValue"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("value"),
JsonName: proto.String("value"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
Name: proto.String("value"),
JsonName: proto.String("value"),
Number: proto.Int32(1),
DefaultValue: proto.String(""),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
Expand Down Expand Up @@ -628,11 +630,12 @@ func TestNormalizeDescriptor(t *testing.T) {
Name: proto.String("testdata_SimpleMessageProto3WithOptional"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("name"),
JsonName: proto.String("name"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
Name: proto.String("name"),
JsonName: proto.String("name"),
Number: proto.Int32(1),
DefaultValue: proto.String(""),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("value"),
Expand All @@ -644,6 +647,156 @@ func TestNormalizeDescriptor(t *testing.T) {
},
},
},
{
description: "WithProto3Defaults",
in: (&testdata.ValidationP3Defaults{}).ProtoReflect().Descriptor(),
want: &descriptorpb.DescriptorProto{
Name: proto.String("testdata_ValidationP3Defaults"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("double_field"),
JsonName: proto.String("doubleField"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_DOUBLE.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("float_field"),
JsonName: proto.String("floatField"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_FLOAT.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("int32_field"),
JsonName: proto.String("int32Field"),
Number: proto.Int32(3),
Type: descriptorpb.FieldDescriptorProto_TYPE_INT32.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("int64_field"),
JsonName: proto.String("int64Field"),
Number: proto.Int32(4),
Type: descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("uint32_field"),
JsonName: proto.String("uint32Field"),
Number: proto.Int32(5),
Type: descriptorpb.FieldDescriptorProto_TYPE_UINT32.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("sint32_field"),
JsonName: proto.String("sint32Field"),
Number: proto.Int32(7),
Type: descriptorpb.FieldDescriptorProto_TYPE_SINT32.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("sint64_field"),
JsonName: proto.String("sint64Field"),
Number: proto.Int32(8),
Type: descriptorpb.FieldDescriptorProto_TYPE_SINT64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("fixed32_field"),
JsonName: proto.String("fixed32Field"),
Number: proto.Int32(9),
Type: descriptorpb.FieldDescriptorProto_TYPE_FIXED32.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("sfixed32_field"),
JsonName: proto.String("sfixed32Field"),
Number: proto.Int32(11),
Type: descriptorpb.FieldDescriptorProto_TYPE_SFIXED32.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("sfixed64_field"),
JsonName: proto.String("sfixed64Field"),
Number: proto.Int32(12),
Type: descriptorpb.FieldDescriptorProto_TYPE_SFIXED64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("0"),
},
{
Name: proto.String("bool_field"),
JsonName: proto.String("boolField"),
Number: proto.Int32(13),
Type: descriptorpb.FieldDescriptorProto_TYPE_BOOL.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("false"),
},
{
Name: proto.String("string_field"),
JsonName: proto.String("stringField"),
Number: proto.Int32(14),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String(""),
},
{
Name: proto.String("bytes_field"),
JsonName: proto.String("bytesField"),
Number: proto.Int32(15),
Type: descriptorpb.FieldDescriptorProto_TYPE_BYTES.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String(""),
},
{
Name: proto.String("enum_field"),
JsonName: proto.String("enumField"),
Number: proto.Int32(16),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
TypeName: proto.String("testdata_Proto3ExampleEnum_E.Proto3ExampleEnum"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
DefaultValue: proto.String("P3_UNDEFINED"),
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("testdata_Proto3ExampleEnum_E"),
EnumType: []*descriptorpb.EnumDescriptorProto{
{
Name: proto.String("Proto3ExampleEnum"),
Value: []*descriptorpb.EnumValueDescriptorProto{
{
Name: proto.String("P3_UNDEFINED"),
Number: proto.Int32(0),
},
{
Name: proto.String("P3_THING"),
Number: proto.Int32(1),
},
{
Name: proto.String("P3_OTHER_THING"),
Number: proto.Int32(2),
},
{
Name: proto.String("P3_THIRD_THING"),
Number: proto.Int32(3),
},
},
},
},
},
},
},
},
{
description: "WithExternalEnum",
in: (&testdata.ExternalEnumMessage{}).ProtoReflect().Descriptor(),
Expand Down
46 changes: 22 additions & 24 deletions bigquery/storage/managedwriter/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,31 +151,29 @@ func TestValidation_Values(t *testing.T) {
withIntegerValueCount("enum_field", int64(testdata.Proto2ExampleEnum_P2_OTHER_THING), 1),
},
},
/*
BACKEND BEHAVIOR CURRENTLY INCORRECT for proto3 non-presence defaults.
{
description: "proto3 default values",
tableSchema: testdata.ValidationBaseSchema,
inputRow: &testdata.ValidationP3Defaults{},
constraints: []constraintOption{
withExactRowCount(1),
withFloatValueCount("double_field", 0, 1),
withFloatValueCount("float_field", 0, 1),
withIntegerValueCount("int32_field", 0, 1),
withIntegerValueCount("int64_field", 0, 1),
withIntegerValueCount("uint32_field", 0, 1),
withIntegerValueCount("sint32_field", 0, 1),
withIntegerValueCount("sint64_field", 0, 1),
withIntegerValueCount("fixed32_field", 0, 1),
withIntegerValueCount("sfixed32_field", 0, 1),
withIntegerValueCount("sfixed64_field", 0, 1),
withBoolValueCount("bool_field", false, 1),
withStringValueCount("string_field", "", 1),
withBytesValueCount("bytes_field", []byte(""), 1),
withIntegerValueCount("enum_field", int64(0), 1),
},

{
description: "proto3 default values",
tableSchema: testdata.ValidationBaseSchema,
inputRow: &testdata.ValidationP3Defaults{},
constraints: []constraintOption{
withExactRowCount(1),
withFloatValueCount("double_field", 0, 1),
withFloatValueCount("float_field", 0, 1),
withIntegerValueCount("int32_field", 0, 1),
withIntegerValueCount("int64_field", 0, 1),
withIntegerValueCount("uint32_field", 0, 1),
withIntegerValueCount("sint32_field", 0, 1),
withIntegerValueCount("sint64_field", 0, 1),
withIntegerValueCount("fixed32_field", 0, 1),
withIntegerValueCount("sfixed32_field", 0, 1),
withIntegerValueCount("sfixed64_field", 0, 1),
withBoolValueCount("bool_field", false, 1),
withStringValueCount("string_field", "", 1),
withBytesValueCount("bytes_field", []byte(""), 1),
withIntegerValueCount("enum_field", int64(0), 1),
},
*/
},
/*
BACKEND BEHAVIOR FOR WRAPPER TYPES CURRENTLY INCORRECT
{
Expand Down

0 comments on commit 6a742ff

Please sign in to comment.