Skip to content

Commit

Permalink
Decode and store unknown varints (protocolbuffers#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
whatyouhide authored Dec 13, 2021
1 parent fcb57b6 commit 454f97f
Show file tree
Hide file tree
Showing 13 changed files with 219 additions and 85 deletions.
2 changes: 0 additions & 2 deletions conformance/exemptions.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
Recommended.Proto2.JsonInput.FieldNameExtension.Validator
Required.Proto2.JsonInput.StoresDefaultPrimitive.Validator
Required.Proto2.ProtobufInput.UnknownVarint.ProtobufOutput
Required.Proto3.JsonInput.Any.JsonOutput
Required.Proto3.JsonInput.Any.ProtobufOutput
Required.Proto3.JsonInput.AnyNested.JsonOutput
Expand All @@ -21,4 +20,3 @@ Required.Proto3.JsonInput.AnyWithValueForInteger.JsonOutput
Required.Proto3.JsonInput.AnyWithValueForInteger.ProtobufOutput
Required.Proto3.JsonInput.AnyWithValueForJsonObject.JsonOutput
Required.Proto3.JsonInput.AnyWithValueForJsonObject.ProtobufOutput
Required.Proto3.ProtobufInput.UnknownVarint.ProtobufOutput
53 changes: 53 additions & 0 deletions lib/protobuf.ex
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,59 @@ defmodule Protobuf do
@spec encode_to_iodata(struct()) :: iodata()
defdelegate encode_to_iodata(struct), to: Protobuf.Encoder

@doc """
Returns the unknown varint fields that were decoded but were not understood from the schema.
In Protobuf, you can decode a payload (for the same message) encoded with a different version of
the schema for that message. This can result in, for example, the payload containing fields that
cannot be decoded correctly because they're not present in the schema used for decoding. These
fields are skipped, but in some cases you might wish to preserve them in order to re-encode
them, log them, or other. A common case is having to do "round-trips" with messages: you decode
a payload, update the resulting message somehow, and re-encode it for future use. In these
cases, you would probably want to re-encode the unknown fields to maintain symmetry.
The returned value of this function is a list of `{field_number, field_value}` tuples where
`field_number` is the number of the unknown field in the schema used for its encoding and
`field_value` is its varint-decoded value.
The reason why these fields need to be accessed through this function, instead of just as a
field of the struct, is that the field name is *dynamically-generated* when `use Protobuf` is
called (to avoid potential conflicts with existing schema fields).
## Examples
Imagine you have this Protobuf schema:
message User {
uint32 age = 1;
}
You encode this:
payload = Protobuf.encode(User.new!(age: 30))
#=> <<...>>
Now, you try to decode this payload using this schema instead:
message User {
string email = 2;
}
In this case, this function will return the decoded unknown field:
message = User.decode(<<...>>)
Protobuf.get_unknown_varints(message)
#=> [{1, 30}]
"""
@doc since: "0.10.0"
@spec get_unknown_varints(struct()) :: [varint_field]
when varint_field: {field_number :: integer(), value :: integer()}
def get_unknown_varints(%mod{} = struct) do
%Protobuf.MessageProps{unknown_varints_field: field} = mod.__message_props__()
Map.fetch!(struct, field)
end

@doc """
Loads extensions modules.
Expand Down
6 changes: 6 additions & 0 deletions lib/protobuf/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ defmodule Protobuf.Decoder do
bin
|> build_message(module.new(), props)
|> reverse_repeated(repeated_fields)
|> Map.update!(props.unknown_varints_field, &Enum.reverse/1)
|> transform_module(module)
end

Expand Down Expand Up @@ -170,6 +171,11 @@ defmodule Protobuf.Decoder do
new_value = value_for_field(value, current_value, prop)
Protobuf.Extension.put(mod, message, ext_mod, prop.name_atom, new_value)

# Unknown varints
_ when wire_type == wire_varint() ->
unknown_varint = {field_number, value}
Map.update!(message, props.unknown_varints_field, &[unknown_varint | &1])

_ ->
message
end
Expand Down
111 changes: 66 additions & 45 deletions lib/protobuf/dsl.ex
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,55 @@ defmodule Protobuf.DSL do

msg_props = generate_message_props(fields, oneofs, extensions, options)

defines_t_type? = Module.defines_type?(env.module, {:t, 0})
defines_defstruct? = Module.defines?(env.module, {:__struct__, 1})

quote do
@spec __message_props__() :: Protobuf.MessageProps.t()
def __message_props__ do
unquote(Macro.escape(msg_props))
end

unquote(maybe_gen_defstruct(env.module, msg_props))

unquote(maybe_def_t_typespec(env.module, msg_props, extension_props))
cond do
# If both "defstruct" and "@type t()" are called, it's probably okay because it's the code
# we used to generated before from this library, but we want to get rid of it, so we warn.
unquote(defines_defstruct?) and unquote(defines_t_type?) ->
IO.warn("""
Since v0.10.0 of the :protobuf library, the t/0 type and the struct are automatically \
generated for modules that call "use Protobuf" if they are Protobuf enums or messages. \
Remove your explicit definition of both of these or regenerate the files with the \
latest version of the protoc-gen-elixir plugin. This warning will become an error \
in version 0.10.0+ of the :protobuf library.\
""")

# If users defined only "defstruct" OR "@type t()", it means either they didn't generate
# the code through this library or they modified the generated files. In either case,
# let's raise here since we could have inconsistencies between the user-defined spec/type
# and our type/spec, respectively.
unquote(defines_defstruct?) or unquote(defines_t_type?) ->
raise """
since v0.9.0 of the :protobuf library, the t/0 type and the struct are automatically \
generated for modules that call "use Protobuf" if they are Protobuf enums or messages. \
In this module, you defined the struct OR the t/0 type. This could cause inconsistencies \
with the type or struct generated by the library. You can either:
* make sure that you define both the t/0 type as well as the struct, but that will
become an error in later versions of the Protobuf library
* remove both the t/0 type definition as well as the struct definition and let the
library define both
* regenerate the file from the Protobuf source definition with the latest version
of the protoc-gen-elixir plugin, which won't generate the struct or the t/0 type
definition
"""

# Newest version of this library generate both the t/0 type as well as the struct.
true ->
unquote(def_t_typespec(msg_props, extension_props))
unquote(gen_defstruct(msg_props))
end

unquote(maybe_def_enum_functions(msg_props, fields))

Expand All @@ -80,42 +120,22 @@ defmodule Protobuf.DSL do
end
end

defp maybe_def_t_typespec(mod, %MessageProps{enum?: true} = props, _extension_props) do
unless warn_if_t_type_already_defined(mod) do
quote do
@type t() :: unquote(Protobuf.DSL.Typespecs.quoted_enum_typespec(props))
end
defp def_t_typespec(%MessageProps{enum?: true} = props, _extension_props) do
quote do
@type t() :: unquote(Protobuf.DSL.Typespecs.quoted_enum_typespec(props))
end
end

defp maybe_def_t_typespec(mod, %MessageProps{} = props, _extension_props = nil) do
unless warn_if_t_type_already_defined(mod) do
quote do
@type t() :: unquote(Protobuf.DSL.Typespecs.quoted_message_typespec(props))
end
defp def_t_typespec(%MessageProps{} = props, _extension_props = nil) do
quote do
@type t() :: unquote(Protobuf.DSL.Typespecs.quoted_message_typespec(props))
end
end

defp maybe_def_t_typespec(_mod, _props, _extension_props) do
defp def_t_typespec(_props, _extension_props) do
nil
end

defp warn_if_t_type_already_defined(mod) do
if Module.defines_type?(mod, {:t, 0}) do
IO.warn("""
Since v0.9.0 of the :protobuf library, the t/0 type is automatically generated for \
modules that call "use Protobuf" if they are Protobuf enums or messages. \
Remove your explicit definition of the t/0 type or regenerate the files with the \
latest version of the protoc-gen-elixir plugin. This warning will become an error \
in version 0.10.0+ of the :protobuf library.\
""")

true
else
false
end
end

defp maybe_def_enum_functions(%{syntax: syntax, enum?: true, field_props: props}, fields) do
if syntax == :proto3 do
unless props[0], do: raise("The first enum value must be zero in proto3")
Expand Down Expand Up @@ -198,10 +218,20 @@ defmodule Protobuf.DSL do
oneof: Enum.reverse(oneofs),
enum?: Keyword.get(options, :enum) == true,
map?: Keyword.get(options, :map) == true,
extension_range: extensions
extension_range: extensions,
unknown_varints_field: gen_unique_varints_field(Map.keys(field_tags))
}
end

defp gen_unique_varints_field(existing_field_names) do
indexed_names =
Stream.iterate(0, &(&1 + 1))
|> Stream.map(&:"__unknown_varints_#{&1}__")

Stream.concat([:__unknown_varints__], indexed_names)
|> Enum.find(fn tentative_name -> tentative_name not in existing_field_names end)
end

defp gen_extension_props([_ | _] = extends) do
extensions =
Map.new(extends, fn {extendee, name_atom, fnum, opts} ->
Expand Down Expand Up @@ -377,19 +407,6 @@ defmodule Protobuf.DSL do
props
end

defp maybe_gen_defstruct(mod, message_props) do
if Module.defines?(mod, {:__struct__, 1}) do
IO.warn("""
Since v0.9.0 of the :protobuf library, structs are automatically generated for \
modules that call "use Protobuf". Remove the struct definition or regenerate the files \
with the latest version of the protoc-gen-elixir plugin. This warning \
will become an error in version 0.10.0+ of the :protobuf library.\
""")
else
gen_defstruct(message_props)
end
end

defp gen_defstruct(%MessageProps{} = message_props) do
regular_fields =
for {_fnum, %FieldProps{oneof: nil} = prop} <- message_props.field_props,
Expand All @@ -406,8 +423,12 @@ defmodule Protobuf.DSL do
[]
end

unknown_varints_field = {message_props.unknown_varints_field, _default = []}

struct_fields = regular_fields ++ oneof_fields ++ extension_fields ++ [unknown_varints_field]

quote do
defstruct unquote(Macro.escape(regular_fields ++ oneof_fields ++ extension_fields))
defstruct unquote(Macro.escape(struct_fields))
end
end

Expand Down
13 changes: 10 additions & 3 deletions lib/protobuf/dsl/typespecs.ex
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,16 @@ defmodule Protobuf.DSL.Typespecs do
_other -> []
end

quote do
%__MODULE__{unquote_splicing(regular_fields ++ oneof_fields ++ extension_fields)}
end
unknown_varints_fields =
if field_name = message_props.unknown_varints_field do
[{field_name, quote(do: [{field_number :: integer(), value :: integer()}])}]
else
[]
end

field_specs = regular_fields ++ oneof_fields ++ extension_fields ++ unknown_varints_fields

quote do: %__MODULE__{unquote_splicing(field_specs)}
end

defp oneof_spec(syntax, possible_oneof_fields) do
Expand Down
8 changes: 8 additions & 0 deletions lib/protobuf/encoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ defmodule Protobuf.Encoder do

encoded = encode_fields(Map.values(field_props), syntax, struct, oneofs, _acc = [])

encoded = [encoded | encode_unknown_varints(struct, props)]

if syntax == :proto2 do
[encoded | encode_extensions(struct)]
else
Expand Down Expand Up @@ -136,6 +138,12 @@ defmodule Protobuf.Encoder do
end
end

defp encode_unknown_varints(message, %MessageProps{unknown_varints_field: field}) do
for {fnum, value} <- Map.fetch!(message, field) do
[Protobuf.Encoder.encode_fnum(fnum, wire_varint()), Varint.encode(value)]
end
end

defp transform_module(message, module) do
if transform_module = module.transform_module() do
transform_module.encode(message, module)
Expand Down
9 changes: 6 additions & 3 deletions lib/protobuf/message_props.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ defmodule Protobuf.MessageProps do
repeated_fields: [field_name()],
embedded_fields: [field_name()],
syntax: atom(),
oneof: [{atom, non_neg_integer}],
oneof: [{field_name(), tag()}],
enum?: boolean(),
extendable?: boolean(),
map?: boolean(),
extension_range: [{non_neg_integer, non_neg_integer}]
extension_range: [{non_neg_integer(), non_neg_integer()}],
unknown_varints_field: atom()
}

defstruct ordered_tags: [],
tags_map: %{},
field_props: %{},
Expand All @@ -32,5 +34,6 @@ defmodule Protobuf.MessageProps do
enum?: false,
extendable?: false,
map?: false,
extension_range: []
extension_range: [],
unknown_varints_field: nil
end
2 changes: 1 addition & 1 deletion test/protobuf/conformance_regressions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ defmodule Protobuf.ConformanceRegressionsTest do

@describetag message_type: "protobuf_test_messages.proto3.TestAllTypesProto3"

@tag skip: "Issue #218"
# Issue #218
@tag conformance_input: ~S(\250\037\001)
test "Required.Proto3.ProtobufInput.UnknownVarint.ProtobufOutput",
%{proto_input: proto_input, message_mod: message_mod} do
Expand Down
18 changes: 13 additions & 5 deletions test/protobuf/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ defmodule Protobuf.DecoderTest do
end)
end

test "skips unknown varint fields" do
struct = Decoder.decode(<<8, 42, 32, 100, 45, 0, 0, 247, 66>>, TestMsg.Foo)
assert struct == TestMsg.Foo.new(a: 42, d: 123.5)
end

test "skips unknown string fields" do
struct = Decoder.decode(<<8, 42, 45, 0, 0, 247, 66>>, TestMsg.Foo)
assert struct == TestMsg.Foo.new(a: 42, d: 123.5)
Expand All @@ -59,6 +54,19 @@ defmodule Protobuf.DecoderTest do
assert struct == TestMsg.Foo.new(a: 123, g: [12, 13, 14])
end

test "decodes unknown varints" do
struct =
Decoder.decode(
<<8, 42, 45, 0, 0, 247, 66, 32, 100, 160, 6, 255, 255, 255, 255, 255, 255, 255, 255, 255,
1>>,
TestMsg.Foo
)

assert struct.a == 42
assert struct.d == 123.5
assert Protobuf.get_unknown_varints(struct) == [{4, 100}, {100, 18_446_744_073_709_551_615}]
end

test "decodes repeated embedded fields" do
bin = <<74, 7, 8, 12, 18, 3, 97, 98, 99, 74, 2, 8, 13>>
struct = Decoder.decode(bin, TestMsg.Foo)
Expand Down
Loading

0 comments on commit 454f97f

Please sign in to comment.