From be31a70f78d8db79262c398425b6c7c35e5775de Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 9 Jul 2024 09:47:19 +0200 Subject: [PATCH] Fix bytes and fixed JSON value decode --- src/avro_json_decoder.erl | 22 ++++++++++++---------- src/avro_json_encoder.erl | 2 +- test/avro_json_decoder_tests.erl | 28 ++++++++++++++++++++++------ test/data/interop.avsc | 2 +- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/avro_json_decoder.erl b/src/avro_json_decoder.erl index cad7996..d233990 100644 --- a/src/avro_json_decoder.erl +++ b/src/avro_json_decoder.erl @@ -1,5 +1,5 @@ %%%----------------------------------------------------------------------------- -%%% Copyright (c) 2013-2018 Klarna AB +%%% Copyright (c) 2013-2024 Klarna AB %%% %%% This file is provided to you under the Apache License, %%% Version 2.0 (the "License"); you may not use this file @@ -371,16 +371,18 @@ parse_prim(V, Type) when ?IS_STRING_TYPE(Type) andalso is_binary(V) -> avro_primitive:string(V). +%% Avro bytes and fixed type values are encoded as \u escaped string +%% e.g. \u00ff for 255. +%% The JSON library (jsone) however, tries to decode it as utf8 strings +%% here we try to revert it. -spec parse_bytes(binary()) -> binary(). -parse_bytes(BytesStr) -> - list_to_binary(parse_bytes(BytesStr, [])). - --spec parse_bytes(binary(), [byte()]) -> [byte()]. -parse_bytes(<<>>, Acc) -> - lists:reverse(Acc); -parse_bytes(<<"\\u00", B1, B0, Rest/binary>>, Acc) -> - Byte = erlang:list_to_integer([B1, B0], 16), - parse_bytes(Rest, [Byte | Acc]). +parse_bytes(Bytes) -> + Original = unicode:characters_to_list(Bytes, utf8), + try + iolist_to_binary(Original) + catch _:_ -> + error({invalid_bytes_value, Bytes}) + end. -spec parse_record(json_value(), record_type(), lkup_fun(), decoder_options()) -> diff --git a/src/avro_json_encoder.erl b/src/avro_json_encoder.erl index 836e2ee..4cd668c 100644 --- a/src/avro_json_encoder.erl +++ b/src/avro_json_encoder.erl @@ -1,6 +1,6 @@ %% coding: latin-1 %%%----------------------------------------------------------------------------- -%%% Copyright (c) 2013-2018 Klarna AB +%%% Copyright (c) 2013-2024 Klarna AB %%% %%% This file is provided to you under the Apache License, %%% Version 2.0 (the "License"); you may not use this file diff --git a/test/avro_json_decoder_tests.erl b/test/avro_json_decoder_tests.erl index a55f131..eb09bfd 100644 --- a/test/avro_json_decoder_tests.erl +++ b/test/avro_json_decoder_tests.erl @@ -1,6 +1,6 @@ %% coding: latin-1 %%%------------------------------------------------------------------- -%%% Copyright (c) 2013-2018 Klarna AB +%%% Copyright (c) 2013-2024 Klarna AB %%% %%% This file is provided to you under the Apache License, %%% Version 2.0 (the "License"); you may not use this file @@ -235,10 +235,24 @@ parse_fixed_type_test() -> ?assertEqual(ExpectedType, Fixed). parse_bytes_value_test() -> - Json = <<"\\u0010\\u0000\\u00FF">>, - Value = parse_value(Json, avro_primitive:bytes_type(), none), + RawJson = <<"{\"a\":\"\\u0010\\u0000\\u00FF\"}">>, + #{<<"a">> := Bytes} = jsone:decode(RawJson), + ?assertEqual([16,0,255], unicode:characters_to_list(Bytes, utf8)), + Value = parse_value(Bytes, avro_primitive:bytes_type(), none), ?assertEqual(avro_primitive:bytes(<<16,0,255>>), Value). +bytes_value_encode_decode_test() -> + Fields = [avro_record:define_field("a", bytes)], + Schema = avro_record:type("Test", Fields, [{namespace, "name.space"}]), + Bytes = iolist_to_binary(lists:seq(0, 255)), + Record = avro_record:new(Schema, [{"a", Bytes}]), + Json = avro_json_encoder:encode_value(Record), + Lkup = fun(_) -> Schema end, + Opts = avro:make_decoder_options([{is_wrapped, false}]), + Decoded = avro_json_decoder:decode_value(Json, Schema, Lkup, Opts), + ?assertEqual([{<<"a">>, Bytes}], Decoded), + ok. + parse_record_value_test() -> %% This test also tests parsing other types inside the record TestRecord = get_test_record(), @@ -337,11 +351,13 @@ parse_map_value_test() -> parse_fixed_value_test() -> Type = avro_fixed:type("FooBar", 2), - Json = <<"\\u0001\\u007f">>, + RawJson = <<"{\"a\":\"\\u0001\\u007f\"}">>, + #{<<"a">> := Bytes} = jsone:decode(RawJson), + ?assertEqual([1,127], unicode:characters_to_list(Bytes, utf8)), ExpectedValue = avro_fixed:new(Type, <<1,127>>), - ?assertEqual(ExpectedValue, parse_value(Json, Type, none)), + ?assertEqual(ExpectedValue, parse_value(Bytes, Type, none)), ?assertEqual(<<1,127>>, - parse(Json, Type, none, + parse(Bytes, Type, none, avro:make_decoder_options([{is_wrapped, false}]))). parse_value_with_lkup_fun_test() -> diff --git a/test/data/interop.avsc b/test/data/interop.avsc index 8cfbba2..7be2c72 100644 --- a/test/data/interop.avsc +++ b/test/data/interop.avsc @@ -6,7 +6,7 @@ {"name": "boolField", "type": "boolean"}, {"name": "floatField", "type": "float"}, {"name": "doubleField", "type": "double"}, - {"name": "bytesField", "type": "bytes"}, + {"name": "bytesField", "type": "bytes", "default": "\u0000"}, {"name": "nullField", "type": "null"}, {"name": "arrayField", "type": {"type": "array", "items": "double"}}, {"name": "mapField", "type":