Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve string literals handling #155

Merged
merged 2 commits into from
Aug 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions lib/ripper_ruby_parser/sexp_handlers/string_literals.rb
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ def process_symbols(exp)

def process_at_tstring_content(exp)
_, content, pos, delim = exp.shift 4
string = handle_string_unescaping(content, delim)
string = handle_string_encoding(string, delim)
string = fix_encoding handle_string_unescaping(content, delim)
with_position(pos, s(:str, string))
end

Expand Down Expand Up @@ -252,15 +251,6 @@ def handle_string_unescaping(content, delim)
content
end
end

def handle_string_encoding(string, delim)
case delim
when INTERPOLATING_HEREDOC, INTERPOLATING_WORD_LIST, *INTERPOLATING_STRINGS
fix_encoding string
else
string
end
end
end
end
end
21 changes: 21 additions & 0 deletions test/ripper_ruby_parser/sexp_handlers/string_literals_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,23 @@
.must_be_parsed_as s(:str, "bar\rbaz\r\n")
end

describe "when an encoding comment is used" do
it "creates UTF-8 strings regardless" do
_("# encoding: ascii-8bit\n\"\\0\"")
.must_be_parsed_as s(:str, "\u0000")
end

it "uses UTF8 if multi-byte escapes are used" do
_("# encoding: ascii-8bit\n\"\\u00a4\"")
.must_be_parsed_as s(:str, "\u00a4")
end

it "keeps unicode encoding for escape multi-byte characters" do
_("# encoding: ascii-8bit\n'\\あ'")
.must_be_parsed_as s(:str, "\\あ")
end
end

describe "with double-quoted strings with escape sequences" do
it "works for strings with escape sequences" do
_('"\\n"')
Expand Down Expand Up @@ -295,6 +312,10 @@
_('"2\302\275"').must_be_parsed_as s(:str, "2½")
end

it "converts hex escapes to unicode if possible" do
_('"\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"').must_be_parsed_as s(:str, "日本語")
end

it "does not convert to unicode if result is not valid" do
_('"2\x82\302\275"')
.must_be_parsed_as s(:str,
Expand Down
5 changes: 5 additions & 0 deletions test/samples/ascii.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
# frozen_string_literal: true

%Q[foo\n\0\nbar]
"\u00a4"
"あ"
"\あ"
%q{\あ}
'\あ'