Skip to content

Commit

Permalink
support unicode characters in AsciiStringLiterals up to 0.7.0
Browse files Browse the repository at this point in the history
  • Loading branch information
OmarTawfik committed Feb 12, 2024
1 parent 29ae313 commit 4651241
Show file tree
Hide file tree
Showing 20 changed files with 311 additions and 233 deletions.
5 changes: 5 additions & 0 deletions .changeset/eighty-moons-talk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": patch
---

support unicode characters in `AsciiStringLiterals` up to `0.7.0`
156 changes: 84 additions & 72 deletions crates/solidity/inputs/language/src/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3700,26 +3700,24 @@ codegen_language_macros::compile!(Language(
Token(
name = HexStringLiteral,
definitions = [
TokenDefinition(scanner = Fragment(SingleQuotedHexStringLiteral)),
TokenDefinition(scanner = Fragment(DoubleQuotedHexStringLiteral))
TokenDefinition(
// Surrounded by single quotes:
scanner = Sequence([
Atom("hex'"),
Optional(Fragment(HexStringContents)),
Atom("'")
])
),
TokenDefinition(
// Surrounded by double quotes:
scanner = Sequence([
Atom("hex\""),
Optional(Fragment(HexStringContents)),
Atom("\"")
])
)
]
),
Fragment(
name = SingleQuotedHexStringLiteral,
scanner = Sequence([
Atom("hex'"),
Optional(Fragment(HexStringContents)),
Atom("'")
])
),
Fragment(
name = DoubleQuotedHexStringLiteral,
scanner = Sequence([
Atom("hex\""),
Optional(Fragment(HexStringContents)),
Atom("\"")
])
),
Fragment(
name = HexStringContents,
scanner = Sequence([
Expand All @@ -3744,36 +3742,58 @@ codegen_language_macros::compile!(Language(
Token(
name = AsciiStringLiteral,
definitions = [
TokenDefinition(scanner = Fragment(SingleQuotedAsciiStringLiteral)),
TokenDefinition(scanner = Fragment(DoubleQuotedAsciiStringLiteral))
TokenDefinition(
// Surrounded by single quotes (and allowing unicode):
enabled = Range(from = "0.4.12", till = "0.7.0"),
scanner = Sequence([
Atom("'"),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['\'', '\\', '\r', '\n'])
])),
Atom("'")
])
),
TokenDefinition(
// Surrounded by single quotes (without allowing unicode):
scanner = Sequence([
Atom("'"),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Range(inclusive_start = ' ', inclusive_end = '&'),
Range(inclusive_start = '(', inclusive_end = '['),
Range(inclusive_start = ']', inclusive_end = '~')
])),
Atom("'")
])
),
TokenDefinition(
// Surrounded by double quotes (and allowing unicode):
enabled = Range(from = "0.4.12", till = "0.7.0"),
scanner = Sequence([
Atom("\""),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['"', '\\', '\r', '\n'])
])),
Atom("\"")
])
),
TokenDefinition(
// Surrounded by double quotes (without allowing unicode):
scanner = Sequence([
Atom("\""),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Range(inclusive_start = ' ', inclusive_end = '!'),
Range(inclusive_start = '#', inclusive_end = '['),
Range(inclusive_start = ']', inclusive_end = '~')
])),
Atom("\"")
])
)
]
),
Fragment(
name = SingleQuotedAsciiStringLiteral,
scanner = Sequence([
Atom("'"),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Range(inclusive_start = ' ', inclusive_end = '&'),
Range(inclusive_start = '(', inclusive_end = '['),
Range(inclusive_start = ']', inclusive_end = '~')
])),
Atom("'")
])
),
Fragment(
name = DoubleQuotedAsciiStringLiteral,
scanner = Sequence([
Atom("\""),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Range(inclusive_start = ' ', inclusive_end = '!'),
Range(inclusive_start = '#', inclusive_end = '['),
Range(inclusive_start = ']', inclusive_end = '~')
])),
Atom("\"")
])
),
Repeated(
name = UnicodeStringLiterals,
reference = UnicodeStringLiteral,
Expand All @@ -3783,39 +3803,31 @@ codegen_language_macros::compile!(Language(
name = UnicodeStringLiteral,
definitions = [
TokenDefinition(
// Surrounded by single quotes:
enabled = From("0.7.0"),
scanner = Fragment(SingleQuotedUnicodeStringLiteral)
scanner = Sequence([
Atom("unicode'"),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['\'', '\\', '\r', '\n'])
])),
Atom("'")
])
),
TokenDefinition(
// Surrounded by double quotes:
enabled = From("0.7.0"),
scanner = Fragment(DoubleQuotedUnicodeStringLiteral)
scanner = Sequence([
Atom("unicode\""),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['"', '\\', '\r', '\n'])
])),
Atom("\"")
])
)
]
),
Fragment(
name = SingleQuotedUnicodeStringLiteral,
enabled = From("0.7.0"),
scanner = Sequence([
Atom("unicode'"),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['\'', '\\', '\r', '\n'])
])),
Atom("'")
])
),
Fragment(
name = DoubleQuotedUnicodeStringLiteral,
enabled = From("0.7.0"),
scanner = Sequence([
Atom("unicode\""),
ZeroOrMore(Choice([
Fragment(EscapeSequence),
Not(['"', '\\', '\r', '\n'])
])),
Atom("\"")
])
),
Fragment(
name = EscapeSequence,
scanner = Sequence([
Expand Down
Loading

0 comments on commit 4651241

Please sign in to comment.