From 43349e0fd2fb4cd1ec62a57579c2bdd4110ae063 Mon Sep 17 00:00:00 2001 From: Shylock Hg <33566796+Shylock-Hg@users.noreply.github.com> Date: Wed, 30 Nov 2022 10:35:17 +0800 Subject: [PATCH 1/2] Extend white space in unicode. --- src/parser/scanner.lex | 7 +++++-- src/parser/test/ScannerTest.cpp | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 2e9f794df92..828a5c7b949 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -35,7 +35,10 @@ static constexpr size_t MAX_STRING = 4096; %x LB_STR %x COMMENT -blanks ([ \t\n]+) +black_without_newline ([ \t\r\xa0]) +blank ({black_without_newline}|[\n]) + +blanks ({blank}+) NOT_IN (NOT{blanks}IN) NOT_CONTAINS (NOT{blanks}CONTAINS) @@ -514,7 +517,7 @@ LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}* throw GraphParser::syntax_error(*yylloc, "Don't allow DOT in label:"); } -[ \r\t] { } +{black_without_newline} { } \n { yylineno++; yylloc->lines(yyleng); diff --git a/src/parser/test/ScannerTest.cpp b/src/parser/test/ScannerTest.cpp index bd05a318dd3..f1b0675166e 100644 --- a/src/parser/test/ScannerTest.cpp +++ b/src/parser/test/ScannerTest.cpp @@ -532,6 +532,11 @@ TEST(Scanner, Basic) { CHECK_SEMANTIC_VALUE("label", TokenType::LABEL, "label"), CHECK_SEMANTIC_VALUE("label123", TokenType::LABEL, "label123"), + // \xA0 is white space in UTF-8 too + CHECK_SEMANTIC_VALUE("\xA0" + "abc", + TokenType::LABEL, + "abc"), CHECK_SEMANTIC_VALUE("123", TokenType::INTEGER, 123), CHECK_SEMANTIC_VALUE("0x123", TokenType::INTEGER, 0x123), From 8e3e733e72f8f5b96a90fa6a4c0d3e66fd9d53b3 Mon Sep 17 00:00:00 2001 From: Shylock Hg <33566796+Shylock-Hg@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:08:14 +0800 Subject: [PATCH 2/2] Fix typo. --- src/parser/scanner.lex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 828a5c7b949..a6fedde4a68 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -35,8 +35,8 @@ static constexpr size_t MAX_STRING = 4096; %x LB_STR %x COMMENT -black_without_newline ([ \t\r\xa0]) -blank ({black_without_newline}|[\n]) +blank_without_newline ([ \t\r\xa0]) +blank ({blank_without_newline}|[\n]) blanks ({blank}+) @@ -517,7 +517,7 @@ LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}* throw GraphParser::syntax_error(*yylloc, "Don't allow DOT in label:"); } -{black_without_newline} { } +{blank_without_newline} { } \n { yylineno++; yylloc->lines(yyleng);