Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Restrict identifier grammar" #11687

Merged
merged 1 commit into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/generate_unicode_data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ alternate_ranges = alternate_ranges(downcase_one_ranges)
casefold_ranges = case_ranges entries, &.casefold

all_strides = {} of String => Array(Stride)
categories = %w(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Zs Zl Zp Cc Cf Cs Co Cn)
categories = %w(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs Co Cn)

categories.each do |category|
all_strides[category] = strides entries, category, &.general_category
Expand Down
6 changes: 3 additions & 3 deletions spec/compiler/crystal/tools/format_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ describe Crystal::Command::FormatCommand do
format_command.run
format_command.status_code.should eq(1)
stdout.to_s.empty?.should be_true
stderr.to_s.should contain("file 'STDIN' is not a valid Crystal source file: Unexpected byte 0xFE at position 0, malformed UTF-8")
stderr.to_s.should contain("file 'STDIN' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")
end

it "formats stdin (bug)" do
Expand Down Expand Up @@ -162,7 +162,7 @@ describe Crystal::Command::FormatCommand do
format_command.status_code.should eq(1)
stdout.to_s.should contain("Format #{Path[".", "format.cr"]}")
stderr.to_s.should contain("syntax error in '#{Path[".", "syntax_error.cr"]}:1:3': unexpected token: EOF")
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xFE at position 0, malformed UTF-8")
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")

File.read(File.join(path, "format.cr")).should eq("if true\n 1\nend\n")
end
Expand Down Expand Up @@ -226,7 +226,7 @@ describe Crystal::Command::FormatCommand do
stderr.to_s.should_not contain("not_format.cr")
stderr.to_s.should contain("formatting '#{Path[".", "format.cr"]}' produced changes")
stderr.to_s.should contain("syntax error in '#{Path[".", "syntax_error.cr"]}:1:3': unexpected token: EOF")
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xFE at position 0, malformed UTF-8")
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")
end
end
end
Expand Down
17 changes: 2 additions & 15 deletions spec/compiler/lexer/lexer_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,8 @@ describe "Lexer" do
:pointerof, :sizeof, :instance_sizeof, :offsetof, :as, :as?, :typeof, :for, :in,
:with, :self, :super, :private, :protected, :asm, :uninitialized, :nil?,
:annotation, :verbatim]
it_lexes_idents ["ident", "something", "with_underscores", "_start_underscore", "with_1", "foo?", "bar!", "fooBar"]
it_lexes_idents [
"ä", # L
"a\u0300", # Mn
"aः", # Mc
"a٠", # Nd
"a_", # Pc
"aⅧ", # Nl
]

assert_syntax_error "\u200B", "unknown token: '\\u200B'"
assert_syntax_error "ident\u200B", "unknown token: '\\u200B'"
assert_syntax_error ":\u200B", %(unexpected token: ":")
assert_syntax_error ":ident\u200B", "unknown token: '\\u200B'"

it_lexes_idents ["ident", "something", "with_underscores", "with_1", "foo?", "bar!", "fooBar",
"❨╯°□°❩╯︵┻━┻"]
it_lexes_idents ["def?", "if?", "else?", "elsif?", "end?", "true?", "false?", "class?", "while?",
"do?", "yield?", "return?", "unless?", "next?", "break?", "begin?"]
it_lexes_idents ["def!", "if!", "else!", "elsif!", "end!", "true!", "false!", "class!", "while!",
Expand Down
13 changes: 0 additions & 13 deletions spec/compiler/parser/parser_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -156,19 +156,6 @@ module Crystal
it_parses "a = 1", Assign.new("a".var, 1.int32)
it_parses "a = b = 2", Assign.new("a".var, Assign.new("b".var, 2.int32))

# check control characters: They're allowed inside literals, but not in identifiers.
['\u200B', '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u2069'].each do |char|
it_parses %('#{char}'), CharLiteral.new(char)
assert_syntax_error %(ident#{char}), "unknown token: #{char.inspect}"
it_parses %("#{char}"), StringLiteral.new("#{char}")
it_parses %(%w(#{char})), ArrayLiteral.new([StringLiteral.new "#{char}"] of ASTNode, of: Path.new("String", global: true))
assert_syntax_error %(:#{char}), %(unexpected token: ":")
it_parses %(:"#{char}"), SymbolLiteral.new "#{char}"
it_parses %(%i(#{char})), ArrayLiteral.new([SymbolLiteral.new "#{char}"] of ASTNode, of: Path.new("Symbol", global: true))
it_parses %(##{char}), Nop.new
it_parses %(macro foo\n##{char}\nend), Macro.new("foo", body: MacroLiteral.new("##{char}\n"))
end

it_parses "a, b = 1, 2", MultiAssign.new(["a".var, "b".var] of ASTNode, [1.int32, 2.int32] of ASTNode)
it_parses "a, b = 1", MultiAssign.new(["a".var, "b".var] of ASTNode, [1.int32] of ASTNode)
it_parses "_, _ = 1, 2", MultiAssign.new([Underscore.new, Underscore.new] of ASTNode, [1.int32, 2.int32] of ASTNode)
Expand Down
14 changes: 3 additions & 11 deletions src/compiler/crystal/syntax/lexer.cr
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ module Crystal

def initialize(string, string_pool : StringPool? = nil)
@reader = Char::Reader.new(string)

if error = @reader.error
::raise InvalidByteSequenceError.new("Unexpected byte 0x#{error.to_s(16, upcase: true)} at position #{@reader.pos}, malformed UTF-8")
end

@token = Token.new
@temp_token = Token.new
@line_number = 1
Expand Down Expand Up @@ -2758,7 +2753,7 @@ module Crystal
def next_char_no_column_increment
char = @reader.next_char
if error = @reader.error
::raise InvalidByteSequenceError.new("Unexpected byte 0x#{error.to_s(16, upcase: true)} at position #{@reader.pos}, malformed UTF-8")
::raise InvalidByteSequenceError.new("Unexpected byte 0x#{error.to_s(16)} at position #{@reader.pos}, malformed UTF-8")
end
char
end
Expand Down Expand Up @@ -2860,14 +2855,11 @@ module Crystal
end

def self.ident_start?(char)
char.letter? || char == '_'
char.ascii_letter? || char == '_' || char.ord > 0x9F
end

def self.ident_part?(char)
ident_start?(char) ||
Unicode.mark_nonspacing?(char) || Unicode.mark_spacing_combining?(char) ||
Unicode.number_digit?(char) || Unicode.number_letter?(char) ||
Unicode.punctuation_connector?(char)
ident_start?(char) || char.ascii_number?
end

def self.ident?(name)
Expand Down
14 changes: 0 additions & 14 deletions src/unicode/data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -1922,20 +1922,6 @@ module Unicode
end
end

@@category_Pc : Array({Int32, Int32, Int32})?

private def self.category_Pc
@@category_Pc ||= begin
data = Array({Int32, Int32, Int32}).new(5)
put(data, 95, 8255, 8160)
put(data, 8256, 8276, 20)
put(data, 65075, 65076, 1)
put(data, 65101, 65103, 1)
put(data, 65343, 65343, 1)
data
end
end

@@category_Zs : Array({Int32, Int32, Int32})?

private def self.category_Zs
Expand Down
25 changes: 0 additions & 25 deletions src/unicode/unicode.cr
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,6 @@ module Unicode
in_any_category?(char.ord, category_Nd, category_Nl, category_No)
end

# :nodoc:
def self.number_digit?(char : Char) : Bool
in_any_category?(char.ord, category_Nd)
end

# :nodoc:
def self.number_letter?(char : Char) : Bool
in_any_category?(char.ord, category_Nl)
end

# :nodoc:
def self.control?(char : Char) : Bool
in_any_category?(char.ord, category_Cs, category_Co, category_Cn, category_Cf, category_Cc)
Expand All @@ -217,26 +207,11 @@ module Unicode
in_any_category?(char.ord, category_Zs, category_Zl, category_Zp)
end

# :nodoc:
def self.punctuation_connector?(char : Char) : Bool
in_any_category?(char.ord, category_Pc)
end

# :nodoc:
def self.mark?(char : Char) : Bool
in_any_category?(char.ord, category_Mn, category_Me, category_Mc)
end

# :nodoc:
def self.mark_nonspacing?(char : Char) : Bool
in_any_category?(char.ord, category_Mn)
end

# :nodoc:
def self.mark_spacing_combining?(char : Char) : Bool
in_any_category?(char.ord, category_Mc)
end

private def self.search_ranges(haystack, needle)
value = haystack.bsearch { |low, high, delta| needle <= high }
if value && value[0] <= needle <= value[1]
Expand Down