From 0bb15bcd1cdb5359afc178e29d3745153e4668c9 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Tue, 4 Apr 2023 22:46:02 +0800 Subject: [PATCH 1/3] Don't use symbols in `Crystal::Lexer#check_macro_opening_keyword` --- src/compiler/crystal/syntax/lexer.cr | 70 +++++++++++----------------- 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 92e4f1377f01..656addefb028 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1966,7 +1966,7 @@ module Crystal elsif !delimiter_state && whitespace && (keyword = lookahead { check_macro_opening_keyword(beginning_of_line) }) char = current_char - nest += 1 unless keyword == :abstract_def + nest += 1 unless keyword == {Keyword::ABSTRACT, Keyword::DEF} whitespace = true beginning_of_line = false next @@ -2044,7 +2044,7 @@ module Crystal end end - def check_macro_opening_keyword(beginning_of_line) + def check_macro_opening_keyword(beginning_of_line) : Keyword | {Keyword, Keyword}? case char = current_char when 'a' case next_char @@ -2052,79 +2052,65 @@ module Crystal if char_sequence?('s', 't', 'r', 'a', 'c', 't') && next_char.whitespace? case next_char when 'd' - char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char && :abstract_def + {Keyword::ABSTRACT, Keyword::DEF} if char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char when 'c' - char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char && :abstract_class + {Keyword::ABSTRACT, Keyword::CLASS} if char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char when 's' - char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char && :abstract_struct - else - false + {Keyword::ABSTRACT, Keyword::STRUCT} if char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char end end when 'n' - char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char && :annotation - else - false + Keyword::ANNOTATION if char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char end when 'b' - char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char && :begin + Keyword::BEGIN if char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char when 'c' case next_char when 'a' - char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char && :case + Keyword::CASE if char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char when 'l' - char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char && :class - else - false + Keyword::CLASS if char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char end when 'd' case next_char when 'o' - peek_not_ident_part_or_end_next_char && :do + Keyword::DO if peek_not_ident_part_or_end_next_char when 'e' - next_char == 'f' && peek_not_ident_part_or_end_next_char && :def - else - false + Keyword::DEF if next_char == 'f' && peek_not_ident_part_or_end_next_char end when 'f' - char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char && :fun + Keyword::FUN if char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char when 'i' - beginning_of_line && next_char == 'f' && (char = next_char) && (!ident_part_or_end?(char) && :if) + Keyword::IF if beginning_of_line && next_char == 'f' && (char = next_char) && !ident_part_or_end?(char) when 'l' - char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char && :lib + Keyword::LIB if char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char when 'm' case next_char when 'a' - char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char && :macro + Keyword::MACRO if char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char when 'o' - char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char && :module - else - false + Keyword::MODULE if char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char end when 's' case next_char when 'e' - char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char && :select + Keyword::SELECT if char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char when 't' - char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char && :struct - else - false + Keyword::STRUCT if char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char end when 'u' - next_char == 'n' && case next_char - when 'i' - char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char && :union - when 'l' - beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char && :unless - when 't' - beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char && :until - else - false + if next_char == 'n' + case next_char + when 'i' + Keyword::UNION if char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char + when 'l' + Keyword::UNLESS if beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char + when 't' + Keyword::UNTIL if beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char + end end when 'w' - beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char && :while - else - false + Keyword::WHILE if beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char end end From 2b38856580be6a23195e6b1be02700f97cb1e9bf Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Tue, 4 Apr 2023 23:25:36 +0800 Subject: [PATCH 2/3] simplify return value --- src/compiler/crystal/syntax/lexer.cr | 46 ++++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 656addefb028..b11de074b0be 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1963,10 +1963,10 @@ module Crystal char = current_char whitespace = true beginning_of_line = false - elsif !delimiter_state && whitespace && (keyword = lookahead { check_macro_opening_keyword(beginning_of_line) }) + elsif !delimiter_state && whitespace && (keyword = lookahead { macro_starts_with_keyword?(beginning_of_line) }) char = current_char - nest += 1 unless keyword == {Keyword::ABSTRACT, Keyword::DEF} + nest += 1 unless keyword[:abstract_def] whitespace = true beginning_of_line = false next @@ -2044,7 +2044,7 @@ module Crystal end end - def check_macro_opening_keyword(beginning_of_line) : Keyword | {Keyword, Keyword}? + def macro_starts_with_keyword?(beginning_of_line) : {abstract_def: Bool}? case char = current_char when 'a' case next_char @@ -2052,65 +2052,65 @@ module Crystal if char_sequence?('s', 't', 'r', 'a', 'c', 't') && next_char.whitespace? case next_char when 'd' - {Keyword::ABSTRACT, Keyword::DEF} if char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char + {abstract_def: true} if char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char when 'c' - {Keyword::ABSTRACT, Keyword::CLASS} if char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char when 's' - {Keyword::ABSTRACT, Keyword::STRUCT} if char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char end end when 'n' - Keyword::ANNOTATION if char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char end when 'b' - Keyword::BEGIN if char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char when 'c' case next_char when 'a' - Keyword::CASE if char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char when 'l' - Keyword::CLASS if char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char end when 'd' case next_char when 'o' - Keyword::DO if peek_not_ident_part_or_end_next_char + {abstract_def: false} if peek_not_ident_part_or_end_next_char when 'e' - Keyword::DEF if next_char == 'f' && peek_not_ident_part_or_end_next_char + {abstract_def: false} if next_char == 'f' && peek_not_ident_part_or_end_next_char end when 'f' - Keyword::FUN if char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char when 'i' - Keyword::IF if beginning_of_line && next_char == 'f' && (char = next_char) && !ident_part_or_end?(char) + {abstract_def: false} if beginning_of_line && next_char == 'f' && (char = next_char) && !ident_part_or_end?(char) when 'l' - Keyword::LIB if char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char when 'm' case next_char when 'a' - Keyword::MACRO if char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char when 'o' - Keyword::MODULE if char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char end when 's' case next_char when 'e' - Keyword::SELECT if char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char + {abstract_def: false} if char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char when 't' - Keyword::STRUCT if char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char + {abstract_def: false} if char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char end when 'u' if next_char == 'n' case next_char when 'i' - Keyword::UNION if char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char when 'l' - Keyword::UNLESS if beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char when 't' - Keyword::UNTIL if beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char end end when 'w' - Keyword::WHILE if beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char + {abstract_def: false} if beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char end end From 810defb7c895341d265b553d112cab7ff2d18e9f Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Tue, 6 Jun 2023 01:12:32 +0800 Subject: [PATCH 3/3] use enum --- src/compiler/crystal/syntax/lexer.cr | 49 +++++++++++++++------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 2c1b0ea1474d..4ea100eb6282 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1928,7 +1928,7 @@ module Crystal elsif !delimiter_state && whitespace && (keyword = lookahead { macro_starts_with_keyword?(beginning_of_line) }) char = current_char - nest += 1 unless keyword[:abstract_def] + nest += 1 unless keyword.abstract_def? whitespace = true beginning_of_line = false next @@ -2006,7 +2006,12 @@ module Crystal end end - def macro_starts_with_keyword?(beginning_of_line) : {abstract_def: Bool}? + enum MacroKeywordState + AbstractDef + Other + end + + def macro_starts_with_keyword?(beginning_of_line) : MacroKeywordState? case char = current_char when 'a' case next_char @@ -2014,65 +2019,65 @@ module Crystal if char_sequence?('s', 't', 'r', 'a', 'c', 't') && next_char.whitespace? case next_char when 'd' - {abstract_def: true} if char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char + MacroKeywordState::AbstractDef if char_sequence?('e', 'f') && peek_not_ident_part_or_end_next_char when 'c' - {abstract_def: false} if char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('l', 'a', 's', 's') && peek_not_ident_part_or_end_next_char when 's' - {abstract_def: false} if char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('t', 'r', 'u', 'c', 't') && peek_not_ident_part_or_end_next_char end end when 'n' - {abstract_def: false} if char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('n', 'o', 't', 'a', 't', 'i', 'o', 'n') && peek_not_ident_part_or_end_next_char end when 'b' - {abstract_def: false} if char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('e', 'g', 'i', 'n') && peek_not_ident_part_or_end_next_char when 'c' case next_char when 'a' - {abstract_def: false} if char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('s', 'e') && peek_not_ident_part_or_end_next_char when 'l' - {abstract_def: false} if char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('a', 's', 's') && peek_not_ident_part_or_end_next_char end when 'd' case next_char when 'o' - {abstract_def: false} if peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if peek_not_ident_part_or_end_next_char when 'e' - {abstract_def: false} if next_char == 'f' && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if next_char == 'f' && peek_not_ident_part_or_end_next_char end when 'f' - {abstract_def: false} if char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('u', 'n') && peek_not_ident_part_or_end_next_char when 'i' - {abstract_def: false} if beginning_of_line && next_char == 'f' && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if beginning_of_line && next_char == 'f' && peek_not_ident_part_or_end_next_char when 'l' - {abstract_def: false} if char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('i', 'b') && peek_not_ident_part_or_end_next_char when 'm' case next_char when 'a' - {abstract_def: false} if char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('c', 'r', 'o') && peek_not_ident_part_or_end_next_char when 'o' - {abstract_def: false} if char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('d', 'u', 'l', 'e') && peek_not_ident_part_or_end_next_char end when 's' case next_char when 'e' - {abstract_def: false} if char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char + MacroKeywordState::Other if char_sequence?('l', 'e', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char when 't' - {abstract_def: false} if char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char + MacroKeywordState::Other if char_sequence?('r', 'u', 'c', 't') && !ident_part_or_end?(peek_next_char) && next_char end when 'u' if next_char == 'n' case next_char when 'i' - {abstract_def: false} if char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if char_sequence?('o', 'n') && peek_not_ident_part_or_end_next_char when 'l' - {abstract_def: false} if beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if beginning_of_line && char_sequence?('e', 's', 's') && peek_not_ident_part_or_end_next_char when 't' - {abstract_def: false} if beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if beginning_of_line && char_sequence?('i', 'l') && peek_not_ident_part_or_end_next_char end end when 'w' - {abstract_def: false} if beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char + MacroKeywordState::Other if beginning_of_line && char_sequence?('h', 'i', 'l', 'e') && peek_not_ident_part_or_end_next_char end end