From 22e3c7fe4423d7c5f317d95f84de524253e0aed3 Mon Sep 17 00:00:00 2001 From: avivpxi <42111576+avivpxi@users.noreply.github.com> Date: Thu, 21 Apr 2022 13:46:42 +0300 Subject: [PATCH] refine api and docs according to internal reviews (#4) refine api and docs according to internal reviews --- README.md | 170 +++++++++++--------------------- benchmark_test.go | 5 +- cache.go | 14 ++- example_test.go | 5 +- unmarshal.go | 7 +- unmarshal_from_json_map.go | 7 +- unmarshal_from_json_map_test.go | 3 +- unmarshal_test.go | 3 +- 8 files changed, 76 insertions(+), 138 deletions(-) diff --git a/README.md b/README.md index f26cdd2..a2afcea 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,20 @@ # Marshmallow -Package marshmallow provides a simple API to perform flexible and performant JSON unmarshalling. Unlike other packages, -marshmallow supports unmarshalling of some known and some unknown fields with zero performance overhead nor extra coding -needed. While unmarshalling, marshmallow allows fully retaining the original data and access it via a typed struct and a -dynamic map. - -- [Install](#install) -- [Usage](#usage) - * [Where Does Marshmallow Shine](#where-does-marshmallow-shine) -- [Alternatives and Performance Benchmark](#alternatives-and-performance-benchmark) -- [API](#api) - * [Unmarshal](#unmarshal) - * [UnmarshalFromJSONMap](#unmarshalfromjsonmap) - * [API Options](#api-options) - * [Caching](#caching) + + +Marshmallow package provides a simple API to perform flexible and performant JSON unmarshalling in Go. +Unlike other packages, marshmallow supports unmarshalling of some known and some unknown fields with zero performance +overhead nor extra coding needed. While unmarshalling, marshmallow allows fully retaining the original data and access +it via a typed struct and a dynamic map. + +## Contents + +- [Marshmallow](#marshmallow) + * [Install](#install) + * [Usage](#usage) + * [Performance Benchmark And Alternatives](#performance-benchmark-and-alternatives) + * [When Should I Use Marshmallow](#when-should-i-use-marshmallow) + * [API](#api) ## Install @@ -29,11 +30,10 @@ package main import ( "fmt" "github.com/perimeterx/marshmallow" - "sync" ) func main() { - marshmallow.EnableCache(&sync.Map{}) // this is used to boost performance, read more below + marshmallow.EnableCache() // this is used to boost performance, read more below v := struct { Foo string `json:"foo"` Boo []int `json:"boo"` @@ -44,7 +44,35 @@ func main() { } ``` -#### Where Does Marshmallow Shine +## Performance Benchmark And Alternatives + +Marshmallow performs best when dealing with mixed data - when some fields are known and some are unknown. +More info [below](#when-should-i-use-marshmallow). +Other solutions are available for this kind of use case, each solution is explained and documented in the link below. +The full benchmark test can be found +[here](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go). + +|Benchmark|(1)|(2)|(3)|(4)| +|--|--|--|--|--| +|[unmarshall twice](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L40)|228693|5164 ns/op|1640 B/op|51 allocs/op| +|[raw map](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L66)|232236|5116 ns/op|2296 B/op|53 allocs/op| +|[go codec](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L121)|388442|3077 ns/op|2512 B/op|37 allocs/op| +|[marshmallow](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L16)|626168|1853 ns/op|608 B/op|18 allocs/op| +|[marshmallow without populating struct](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L162)|678616|1751 ns/op|608 B/op|18 allocs/op| + +![marshmallow performance comparison](https://raw.githubusercontent.com/PerimeterX/marshmallow/e45088ca20d4ea5be4143d418d12da63a68d6dfd/performance-chart.svg) + +**Marshmallow provides the best performance (up to X3 faster) while not requiring any extra coding.** +In fact, marshmallow performs as fast as normal `json.Unmarshal` call, however, such a call causes loss of data for all +the fields that did not match the given struct. With marshmallow you never loose any data. + +|Benchmark|(1)|(2)|(3)|(4)| +|--|--|--|--|--| +|[marshmallow](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L16)|626168|1853 ns/op|608 B/op|18 allocs/op| +|[native library](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L143)|652106|1845 ns/op|304 B/op|11 allocs/op| +|[marshmallow without populating struct](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L162)|678616|1751 ns/op|608 B/op|18 allocs/op| + +## When Should I Use Marshmallow Marshmallow is best suited for use cases where you are interested in all the input data, but you have predetermined information only about a subset of it. For instance, if you plan to reference two specific fields from the data, then @@ -119,102 +147,16 @@ func isAllowedToDrive(data []byte) (bool, error) { } ``` -There can be two main reasons to have an interest in all the data. First is when you eventually plan to write or pipe -the input data, and you don't want to lose any of it. Second is if you plan to perform any kind of dynamic read of -the data - this includes iterating it, reading calculated or configured field names, and others. - -## Alternatives and Performance Benchmark - -[Full Benchmark](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go) - -Other solutions available for this kind of use case, each solution is explained and documented in the link below. - -|Benchmark|(1)|(2)|(3)|(4)| -|--|--|--|--|--| -|[unmarshall twice](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L40)|228693|5164 ns/op|1640 B/op|51 allocs/op| -|[raw map](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L66)|232236|5116 ns/op|2296 B/op|53 allocs/op| -|[go codec](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L121)|388442|3077 ns/op|2512 B/op|37 allocs/op| -|[marshmallow](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L16)|626168|1853 ns/op|608 B/op|18 allocs/op| -|[marshmallow without populating struct](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L162)|678616|1751 ns/op|608 B/op|18 allocs/op| - -**Marshmallow provides the best performance (up to X3 faster) while not requiring any extra coding.** -In fact, marshmallow performs as fast as normal `json.Unmarshal` call, however, it populates both the map and the -struct. - -|Benchmark|(1)|(2)|(3)|(4)| -|--|--|--|--|--| -|[marshmallow](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L16)|626168|1853 ns/op|608 B/op|18 allocs/op| -|[native library](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L143)|652106|1845 ns/op|304 B/op|11 allocs/op| -|[marshmallow without populating struct](https://github.com/PerimeterX/marshmallow/blob/8c5bba9e6dc0033f4324eca554737089a99f6e5e/benchmark_test.go#L162)|678616|1751 ns/op|608 B/op|18 allocs/op| - ## API -Marshmallow exposes two main API functions - -#### Unmarshal - -`marshmallow.Unmarshal(data []byte, v interface{}, options ...UnmarshalOption) (map[string]interface{}, error)` - -Unmarshal parses the JSON-encoded object in data and stores the values in the struct pointed to by v and in the returned -map. If v is nil or not a pointer to a struct, Unmarshal returns an ErrInvalidValue. If data is not a valid JSON or not -a JSON object Unmarshal returns an ErrInvalidInput. - -Unmarshal follows the rules of json.Unmarshal with the following exceptions: - -- All input fields are stored in the resulting map, including fields that do not exist in the struct pointed by v. -- Unmarshal only operates on JSON object inputs. It will reject all other types of input by returning ErrInvalidInput. -- Unmarshal only operates on struct values. It will reject all other types of v by returning ErrInvalidValue. -- Unmarshal supports three types of Mode values. Each mode is documented below. - -#### UnmarshalFromJSONMap - -`marshmallow.UnmarshalFromJSONMap(data map[string]interface{}, v interface{}, options ...UnmarshalOption) (map[string]interface{}, error)` - -UnmarshalFromJSONMap parses the JSON map data and stores the values in the struct pointed to by v and in the returned -map. If v is nil or not a pointer to a struct, UnmarshalFromJSONMap returns an ErrInvalidValue. - -UnmarshalFromJSONMap follows the rules of json.Unmarshal with the following exceptions: - -- All input fields are stored in the resulting map, including fields that do not exist in the struct pointed by v. -- UnmarshalFromJSONMap receive a JSON map instead of raw bytes. The given input map is assumed to be a JSON map, meaning - it should only contain the following types: `bool`, `string`, `float64`, `[]interface`, and `map[string]interface{}`. - Other types will cause decoding to return unexpected results. -- UnmarshalFromJSONMap only operates on struct values. It will reject all other types of v by returning ErrInvalidValue. -- UnmarshalFromJSONMap supports three types of Mode values. Each mode is documented below. - -**UnmarshalerFromJSONMap** is the interface implemented by types that can unmarshal a JSON description of themselves. In -case you want to implement custom unmarshalling, json.Unmarshaler only supports receiving the data as []byte. However, -while unmarshalling from JSON map, the data is not available as a raw []byte and converting to it will significantly -hurt performance. Thus, if you wish to implement a custom unmarshalling on a type that is being unmarshalled from a JSON -map, you need to implement UnmarshalerFromJSONMap interface. - -#### API Options - -- `marshmallow.WithMode(mode Mode)` sets the unmarshalling mode: - - **ModeFailOnFirstError** is the default mode. It makes unmarshalling terminate immediately on any kind of error. - This error will then be returned. - - **ModeAllowMultipleErrors** mode makes unmarshalling keep decoding even if errors are encountered. In case of such - error, the erroneous value will be omitted from the result. Eventually, all errors will all be returned, alongside - the partial result. - - **ModeFailOverToOriginalValue** mode makes unmarshalling keep decoding even if errors are encountered. In case of - such error, the original external value be placed in the result data, even though it does not meet the schematic - requirements. Eventually, all errors will be returned, alongside the full result. Note that the result map - will contain values that do not match the struct schema. -- `marshmallow.WithSkipPopulateStruct(skipPopulateStruct bool)` sets the skipPopulateStruct option. Skipping populate - struct is set to false by default. If you do not intend to use the struct value once unmarshalling is finished, set - this option to true to boost performance. This would mean the struct fields will not be set with values, but rather it - will only be used as the target schema when populating the result map. - -#### Caching - -`marshmallow.EnableCache` enables unmarshalling cache. It allows reuse of refection information about types needed to -perform the unmarshalling. A use of such cache can boost up unmarshalling by x1.4. Check out -[benchmark_test.go](benchmark_test.go) for an example. - -`EnableCache` is not thread safe! Do not use it while performing unmarshalling, or it will cause an unsafe race condition. -Typically, `EnableCache` should be called once when the process boots. - -Caching is disabled by default. The use of this function allows enabling it and controlling the behavior of the cache. -Typically, the use of `sync.Map` should be good enough. The caching mechanism stores a single `map` per struct type. If -you plan to unmarshal a huge amount of distinct struct it may get to consume a lot of resources, in which case you have -the control to choose the caching implementation you like and its setup. +Marshmallow exposes two main API functions - +[Unmarshal](https://github.com/PerimeterX/marshmallow/blob/0e0218ab860be8a4b5f57f5ff239f281c250c5da/unmarshal.go#L27) +and +[UnmarshalFromJSONMap](https://github.com/PerimeterX/marshmallow/blob/0e0218ab860be8a4b5f57f5ff239f281c250c5da/unmarshal_from_json_map.go#L37). +Each of them can operate in three possible [modes](https://github.com/PerimeterX/marshmallow/blob/0e0218ab860be8a4b5f57f5ff239f281c250c5da/options.go#L30), +and allow setting [skipPopulateStruct](https://github.com/PerimeterX/marshmallow/blob/0e0218ab860be8a4b5f57f5ff239f281c250c5da/options.go#L41) mode. + +Marshmallow also supports caching of refection information using +[EnableCache](https://github.com/PerimeterX/marshmallow/blob/d3500aa5b0f330942b178b155da933c035dd3906/cache.go#L40) +and +[EnableCustomCache](https://github.com/PerimeterX/marshmallow/blob/d3500aa5b0f330942b178b155da933c035dd3906/cache.go#L35). diff --git a/benchmark_test.go b/benchmark_test.go index 71f067f..a69239f 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -7,14 +7,13 @@ package marshmallow import ( "encoding/json" "github.com/ugorji/go/codec" - "sync" "testing" ) // Unmarshal using marshmallow. // This will not require any explicit coding and provide the best performance. func BenchmarkMarshmallow(b *testing.B) { - EnableCache(&sync.Map{}) + EnableCache() var v benchmarkParent var result map[string]interface{} var err error @@ -160,7 +159,7 @@ func BenchmarkJSON(b *testing.B) { // but not interested in the resulting struct. // This will further boost performance. func BenchmarkMarshmallowWithSkipPopulateStruct(b *testing.B) { - EnableCache(&sync.Map{}) + EnableCache() var v benchmarkParent var result map[string]interface{} var err error diff --git a/cache.go b/cache.go index 0ed5e9b..a67cea6 100644 --- a/cache.go +++ b/cache.go @@ -6,6 +6,7 @@ package marshmallow import ( "reflect" + "sync" ) // Cache allows unmarshalling to use a cached version of refection information about types. @@ -19,22 +20,27 @@ type Cache interface { Store(key, value interface{}) } -// EnableCache enables unmarshalling cache. It allows reuse of refection information about types needed +// EnableCustomCache enables unmarshalling cache. It allows reuse of refection information about types needed // to perform the unmarshalling. A use of such cache can boost up unmarshalling by x1.4. // Check out benchmark_test.go for an example. // -// EnableCache is not thread safe! Do not use it while performing unmarshalling, or it will -// cause an unsafe race condition. Typically, EnableCache should be called once when the process boots. +// EnableCustomCache is not thread safe! Do not use it while performing unmarshalling, or it will +// cause an unsafe race condition. Typically, EnableCustomCache should be called once when the process boots. // // Caching is disabled by default. The use of this function allows enabling it and controlling the // behavior of the cache. Typically, the use of sync.Map should be good enough. The caching mechanism // stores a single map per struct type. If you plan to unmarshal a huge amount of distinct // struct it may get to consume a lot of resources, in which case you have the control to choose // the caching implementation you like and its setup. -func EnableCache(c Cache) { +func EnableCustomCache(c Cache) { cache = c } +// EnableCache enables unmarshalling cache with default implementation. More info at EnableCustomCache. +func EnableCache() { + EnableCustomCache(&sync.Map{}) +} + var cache Cache func cacheLookup(t reflect.Type) map[string]reflectionInfo { diff --git a/example_test.go b/example_test.go index 637c548..99808e4 100644 --- a/example_test.go +++ b/example_test.go @@ -3,7 +3,6 @@ package marshmallow_test import ( "fmt" "github.com/perimeterx/marshmallow" - "sync" ) type exampleStruct struct { @@ -13,7 +12,7 @@ type exampleStruct struct { func ExampleUnmarshal() { // enable marshmallow cache to boost up performance by reusing field type information. - marshmallow.EnableCache(&sync.Map{}) + marshmallow.EnableCache() // unmarshal with mode marshmallow.ModeFailOnFirstError and valid value // this will finish unmarshalling and return a nil err @@ -65,7 +64,7 @@ func ExampleUnmarshal() { func ExampleUnmarshalFromJSONMap() { // enable marshmallow cache to boost up performance by reusing field type information. - marshmallow.EnableCache(&sync.Map{}) + marshmallow.EnableCache() // unmarshal with mode marshmallow.ModeFailOnFirstError and valid value // this will finish unmarshalling and return a nil err diff --git a/unmarshal.go b/unmarshal.go index 3ccb784..4dba14c 100644 --- a/unmarshal.go +++ b/unmarshal.go @@ -78,7 +78,7 @@ func (d *decoder) populateStruct(structInstance interface{}, result map[string]i if exists { value, isValidType := d.valueByReflectType(refInfo.t, false) if isValidType { - if doPopulate { + if value != nil && doPopulate { field := structValue.Field(refInfo.i) assignValue(field, value) } @@ -128,10 +128,7 @@ func (d *decoder) valueByReflectType(t reflect.Type, isPtr bool) (interface{}, b if converter := primitiveConverters[kind]; converter != nil { v := d.lexer.Interface() if v == nil { - if isPtr || kind == reflect.Interface { - return v, true - } - return reflect.Zero(t).Interface(), true + return nil, true } converted, ok := converter(v) if !ok { diff --git a/unmarshal_from_json_map.go b/unmarshal_from_json_map.go index bb11c73..f7335ce 100644 --- a/unmarshal_from_json_map.go +++ b/unmarshal_from_json_map.go @@ -76,7 +76,7 @@ func (m *mapDecoder) populateStruct(path []string, data map[string]interface{}, if exists { value, isValidType := m.valueByReflectType(append(path, key), inputValue, refInfo.t, false) if isValidType { - if doPopulate { + if value != nil && doPopulate { field := structValue.Field(refInfo.i) assignValue(field, value) } @@ -118,10 +118,7 @@ func (m *mapDecoder) valueByReflectType(path []string, v interface{}, t reflect. kind := t.Kind() if converter := primitiveConverters[kind]; converter != nil { if v == nil { - if isPtr || kind == reflect.Interface { - return v, true - } - return reflect.Zero(t).Interface(), true + return nil, true } converted, ok := converter(v) if !ok { diff --git a/unmarshal_from_json_map_test.go b/unmarshal_from_json_map_test.go index b17bbc9..e575e6f 100644 --- a/unmarshal_from_json_map_test.go +++ b/unmarshal_from_json_map_test.go @@ -8,12 +8,11 @@ import ( "github.com/go-test/deep" "reflect" "strings" - "sync" "testing" ) func TestUnmarshalFromJSONMapInputVariations(t *testing.T) { - EnableCache(&sync.Map{}) + EnableCache() tests := []struct { name string mode Mode diff --git a/unmarshal_test.go b/unmarshal_test.go index 2c5d177..e01dfce 100644 --- a/unmarshal_test.go +++ b/unmarshal_test.go @@ -12,12 +12,11 @@ import ( "github.com/mailru/easyjson/jlexer" "reflect" "strings" - "sync" "testing" ) func TestUnmarshalInputVariations(t *testing.T) { - EnableCache(&sync.Map{}) + EnableCache() tests := []struct { name string mode Mode