Skip to content

Commit

Permalink
sql: tree.ParseJSON should not ignore trailing data
Browse files Browse the repository at this point in the history
`tree.ParseJSON` uses `Decoder.More()` method to determine
if the input contains trailing data.  The implementation
made incorrect assumptions as to the reason why `Decoder.More()`
allows input to contain `]` or `}` characters, even when
JSON object has been consumed.

This PR fixes and comments those faulty assumptions, and
fixes multiple existing tests that seemed to rely on those
faulty assumptions.

In particular, prior to this PR, the following input would be allowed
(i.e. extra end of object character `}`):
```
[email protected]:26257/movr> select '{"longKey1":"longValue1"}}'::jsonb;
            jsonb
------------------------------
  {"longKey1": "longValue1"}
(1 row)
```

But so would the following be allowed:
```
[email protected]:26257/movr> select '{"longKey1":"longValue1"}} should this data be ignored?'::jsonb;
            jsonb
------------------------------
  {"longKey1": "longValue1"}
(1 row)
```

So, the issue is: if above conversion was executed to insert JSONB
into the database, we would silently truncate (corrupt) JSON input,
and instead of returning an error, we would return success.

This behavior is wrong, and this PR fixes it so that an error is
returned:
```
select '{"longKey1":"longValue1"}} should this data be ignored?'::jsonb;
ERROR: could not parse JSON: trailing characters after JSON document
SQLSTATE: 22P02
```

Release note (bug fix): Do not silently truncate trailing characters
when attemption to convert corrupt JSON string input into JSONb.
  • Loading branch information
Yevgeniy Miretskiy committed Oct 13, 2022
1 parent b955fd6 commit f2d5001
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 11 deletions.
18 changes: 9 additions & 9 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -588,11 +588,11 @@ SELECT * from update_test where j @> '"shortValue"';
2 "shortValue"

query IT
SELECT * from update_test where j @> '{"longKey1":"longValue1"}}';
SELECT * from update_test where j @> '{"longKey1":"longValue1"}';
----

query IT
SELECT * from update_test where j @> '{"longKey2":"longValue2"}}';
SELECT * from update_test where j @> '{"longKey2":"longValue2"}';
----

statement ok
Expand Down Expand Up @@ -778,20 +778,20 @@ INSERT INTO f VALUES
(8, '{"a": {"d": 2}}'),
(9, '{"a": {"b": [1, 2]}}'),
(10, '{"a": {"b": {"c": 1}}}'),
(11, '{"a": {"b": {"c": 1, "d": 2}}}}'),
(12, '{"a": {"b": {"d": 2}}}}'),
(11, '{"a": {"b": {"c": 1, "d": 2}}}'),
(12, '{"a": {"b": {"d": 2}}}'),
(13, '{"a": {"b": {"c": [1, 2]}}}'),
(14, '{"a": {"b": {"c": [1, 2, 3]}}}'),
(15, '{"a": []}'),
(16, '{"a": {}}}'),
(16, '{"a": {}}'),
(17, '{"a": {"b": "c"}}'),
(18, '{"a": {"b": ["c", "d", "e"]}}'),
(19, '{"a": ["b", "c", "d", "e"]}'),
(20, '{"a": ["b", "e", "c", "d"]}'),
(21, '{"z": {"a": "b", "c": "d"}}'),
(22, '{"z": {"a": "b", "c": "d", "e": "f"}}'),
(23, '{"a": "b", "x": ["c", "d", "e"]}}'),
(24, '{"a": "b", "c": [{"d": 1}, {"e": 2}]}}'),
(23, '{"a": "b", "x": ["c", "d", "e"]}'),
(24, '{"a": "b", "c": [{"d": 1}, {"e": 2}]}'),
(25, '{"a": {"b": "c", "d": "e"}}'),
(26, '{"a": {"b": "c"}, "d": "e"}'),
(27, '[1, 2, {"b": "c"}]'),
Expand Down Expand Up @@ -1443,11 +1443,11 @@ INSERT INTO cb2 VALUES
(9, '{"a": {"b": [1, 2]}}'),
(10, '{"a": {"b": {"c": [1, 2, 3]}}}'),
(11, '{"a": {"b": {"c": {"d": "e"}}}}'),
(12, '{"a": {"b": {"d": 2}}}}'),
(12, '{"a": {"b": {"d": 2}}}'),
(13, '{"a": [{"b": {"c": [1, 2]}}]}'),
(14, '{"b": {"c": [1, 2]}}'),
(15, '{"a": []}'),
(16, '{"a": {}}}'),
(16, '{"a": {}}'),
(17, '{"a": [[2]]}'),
(18, '[[2]]'),
(19, '[1, 2]'),
Expand Down
16 changes: 15 additions & 1 deletion pkg/util/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io"
"math/big"
"reflect"
"sort"
Expand Down Expand Up @@ -845,6 +846,10 @@ func ParseJSON(s string) (JSON, error) {
// This goes in two phases - first it parses the string into raw interface{}s
// using the Go encoding/json package, then it transforms that into a JSON.
// This could be faster if we wrote a parser to go directly into the JSON.
// Note: a better way to unmarshal a single JSON value would be to use
// json.Unmarshal; however, we cannot do that because we want to get numbers
// as strings; Alas, json.Unmarshal does not support that, and so, we have
// to use json.Decoder.
var result interface{}
decoder := json.NewDecoder(strings.NewReader(s))
// We want arbitrary size/precision decimals, so we call UseNumber() to tell
Expand All @@ -858,7 +863,16 @@ func ParseJSON(s string) (JSON, error) {
err = pgerror.WithCandidateCode(err, pgcode.InvalidTextRepresentation)
return nil, err
}
if decoder.More() {

// Check to see if input has more data.
// Note: using decoder.More() is wrong since it allows `]` and '}'
// characters (i.e. '{}]this is BAD' will be parsed fine, and More will return
// false -- thus allowing invalid JSON data to be parsed correctly). The
// decoder is meant to be used in the streaming applications; not in a one-off
// Unmarshal mode we are using here. Therefore, to check if input has
// trailing characters, we have to attempt to decode the next value.
var more interface{}
if err := decoder.Decode(&more); err != io.EOF {
return nil, errTrailingCharacters
}
return MakeJSON(result)
Expand Down
4 changes: 3 additions & 1 deletion pkg/util/json/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ func TestJSONErrors(t *testing.T) {
{`true false`, `trailing characters after JSON document`},
{`trues`, `trailing characters after JSON document`},
{`1 2 3`, `trailing characters after JSON document`},
{`[1, 2, 3]]`, `trailing characters after JSON document`},
{`[1, 2, 3]} do not ignore`, `trailing characters after JSON document`},
// Here the decoder just grabs the 0 and leaves the 1. JSON numbers can't have
// leading 0s.
{`01`, `trailing characters after JSON document`},
Expand Down Expand Up @@ -1391,7 +1393,7 @@ func TestEncodeJSONInvertedIndex(t *testing.T) {
nil)}},
{`[["a"]]`, [][]byte{bytes.Join([][]byte{jsonPrefix, arrayPrefix, arrayPrefix, terminator, encoding.EncodeStringAscending(nil, "a")},
nil)}},
{`{"a":["b","c"]}]`, [][]byte{bytes.Join([][]byte{jsonPrefix, aEncoding, objectSeparator, arrayPrefix, terminator, encoding.EncodeStringAscending(nil, "b")}, nil),
{`{"a":["b","c"]}`, [][]byte{bytes.Join([][]byte{jsonPrefix, aEncoding, objectSeparator, arrayPrefix, terminator, encoding.EncodeStringAscending(nil, "b")}, nil),
bytes.Join([][]byte{jsonPrefix, aEncoding, objectSeparator, arrayPrefix, terminator, encoding.EncodeStringAscending(nil, "c")}, nil)}},
{`[]`, [][]byte{bytes.Join([][]byte{jsonPrefix, terminator, emptyArrayEncoding},
nil)}},
Expand Down

0 comments on commit f2d5001

Please sign in to comment.