Skip to content

Commit

Permalink
Merge pull request protocolbuffers#601 from anandolee/master
Browse files Browse the repository at this point in the history
ignore UTF-8 BOM
  • Loading branch information
anandolee committed Jul 17, 2015
2 parents 2f4fb64 + b2d2cf8 commit 7648852
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/google/protobuf/compiler/parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,32 @@ TEST_F(ParserTest, WarnIfSyntaxIdentifierOmmitted) {

typedef ParserTest ParseMessageTest;

TEST_F(ParseMessageTest, IgnoreBOM) {
char input[] = " message TestMessage {\n"
" required int32 foo = 1;\n"
"}\n";
// Set UTF-8 BOM.
input[0] = (char)0xEF;
input[1] = (char)0xBB;
input[2] = (char)0xBF;
ExpectParsesTo(input,
"message_type {"
" name: \"TestMessage\""
" field { name:\"foo\" label:LABEL_REQUIRED type:TYPE_INT32 number:1 }"
"}");
}

TEST_F(ParseMessageTest, BOMError) {
char input[] = " message TestMessage {\n"
" required int32 foo = 1;\n"
"}\n";
input[0] = (char)0xEF;
ExpectHasErrors(input,
"0:1: Proto file starts with 0xEF but not UTF-8 BOM. "
"Only UTF-8 is accepted for proto file.\n"
"0:0: Expected top-level statement (e.g. \"message\").\n");
}

TEST_F(ParseMessageTest, SimpleMessage) {
ExpectParsesTo(
"message TestMessage {\n"
Expand Down
9 changes: 9 additions & 0 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
next_leading_comments);

if (current_.type == TYPE_START) {
// Ignore unicode byte order mark(BOM) if it appears at the file
// beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
if (TryConsume((char)0xEF)) {
if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
"Only UTF-8 is accepted for proto file.");
return false;
}
}
collector.DetachFromPrev();
} else {
// A comment appearing on the same line must be attached to the previous
Expand Down

0 comments on commit 7648852

Please sign in to comment.