Skip to content

Commit

Permalink
Make JSON UTF-8 boundary check inclusive of the largest possible UTF-…
Browse files Browse the repository at this point in the history
…8 character.

UTF-8 acceptable codepoints are 0x0000-0x10FFFF, previously we treated this as an inclusive-exclusive bound, but UTF-8 does include it's topmost character.

PiperOrigin-RevId: 572111435
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Oct 10, 2023
1 parent d0d52fe commit e80b8ec
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/google/protobuf/json/internal/lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ absl::StatusOr<size_t> JsonLexer::ParseUnicodeEscape(char out_utf8[4]) {
out_utf8[1] = ((rune >> 6) & 0x3f) | 0x80;
out_utf8[2] = ((rune >> 0) & 0x3f) | 0x80;
return 3;
} else if (rune < 0x10ffff) {
} else if (rune <= 0x10ffff) {
out_utf8[0] = ((rune >> 18) & 0x07) | 0xF0;
out_utf8[1] = ((rune >> 12) & 0x3f) | 0x80;
out_utf8[2] = ((rune >> 6) & 0x3f) | 0x80;
Expand Down
9 changes: 9 additions & 0 deletions src/google/protobuf/json/internal/lexer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,15 @@ TEST(LexerTest, SimpleString) {
});
}

TEST(LexerTest, UTFBoundaries) {
Do(R"json("\u0001\u07FF\uFFFF\uDBFF\uDFFF")json",
[](io::ZeroCopyInputStream* stream) {
EXPECT_THAT(Value::Parse(stream),
IsOkAndHolds(ValueIs<std::string>(
"\x01\xdf\xbf\xef\xbf\xbf\xf4\x8f\xbf\xbf")));
});
}

TEST(NonStandard, SingleQuoteString) {
DoLegacy(R"json('My String')json", [=](const Value& value) {
EXPECT_THAT(value, ValueIs<std::string>("My String"));
Expand Down

0 comments on commit e80b8ec

Please sign in to comment.