Skip to content

Commit

Permalink
* lexer.rl: extract extend_string_escaped to a separate method (#893)
Browse files Browse the repository at this point in the history
  • Loading branch information
iliabylich authored Dec 15, 2022
1 parent fbc2d7b commit da1a8db
Showing 1 changed file with 59 additions and 53 deletions.
112 changes: 59 additions & 53 deletions lib/parser/lexer.rl
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,64 @@ class Parser::Lexer
!literal.regexp?
end

# String escaping

def extend_string_escaped
current_literal = literal
# Get the first character after the backslash.
escaped_char = @source_buffer.slice(@escape_s).chr

if current_literal.munge_escape? escaped_char
# If this particular literal uses this character as an opening
# or closing delimiter, it is an escape sequence for that
# particular character. Write it without the backslash.

if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
# Regular expressions should include escaped delimiters in their
# escaped form, except when the escaped character is
# a closing delimiter but not a regexp metacharacter.
#
# The backslash itself cannot be used as a closing delimiter
# at the same time as an escape symbol, but it is always munged,
# so this branch also executes for the non-closing-delimiter case
# for the backslash.
current_literal.extend_string(tok, @ts, @te)
else
current_literal.extend_string(escaped_char, @ts, @te)
end
else
# It does not. So this is an actual escape sequence, yay!
if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
# Squiggly heredocs like
# <<~-HERE
# 1\
# 2
# HERE
# treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
# This information is emitted as is, without escaping,
# later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
current_literal.extend_string(tok, @ts, @te)
elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
# Heredocs, regexp and a few other types of literals support line
# continuation via \\\n sequence. The code like
# "a\
# b"
# must be parsed as "ab"
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
# Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
# supported by regexes so far, so it needs a separate branch.
current_literal.extend_string(@escape, @ts, @te)
elsif current_literal.regexp?
# Regular expressions should include escape sequences in their
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
else
current_literal.extend_string(@escape || tok, @ts, @te)
end
end
end

# Mapping of strings to parser tokens.

PUNCTUATION = {
Expand Down Expand Up @@ -909,59 +967,7 @@ class Parser::Lexer
}

action extend_string_escaped {
current_literal = literal
# Get the first character after the backslash.
escaped_char = @source_buffer.slice(@escape_s).chr

if current_literal.munge_escape? escaped_char
# If this particular literal uses this character as an opening
# or closing delimiter, it is an escape sequence for that
# particular character. Write it without the backslash.

if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
# Regular expressions should include escaped delimiters in their
# escaped form, except when the escaped character is
# a closing delimiter but not a regexp metacharacter.
#
# The backslash itself cannot be used as a closing delimiter
# at the same time as an escape symbol, but it is always munged,
# so this branch also executes for the non-closing-delimiter case
# for the backslash.
current_literal.extend_string(tok, @ts, @te)
else
current_literal.extend_string(escaped_char, @ts, @te)
end
else
# It does not. So this is an actual escape sequence, yay!
if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
# Squiggly heredocs like
# <<~-HERE
# 1\
# 2
# HERE
# treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
# This information is emitted as is, without escaping,
# later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
current_literal.extend_string(tok, @ts, @te)
elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
# Heredocs, regexp and a few other types of literals support line
# continuation via \\\n sequence. The code like
# "a\
# b"
# must be parsed as "ab"
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
# Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
# supported by regexes so far, so it needs a separate branch.
current_literal.extend_string(@escape, @ts, @te)
elsif current_literal.regexp?
# Regular expressions should include escape sequences in their
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
else
current_literal.extend_string(@escape || tok, @ts, @te)
end
end
extend_string_escaped
}

# Extend a string with a newline or a EOF character.
Expand Down

0 comments on commit da1a8db

Please sign in to comment.