Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error position reported by the lexer #186

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 32 additions & 17 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
for l.peekOk(i) {
if l.slice(i, i+len(q)) == q {
if len(content) == 0 && name == "identifier" {
l.panicf("invalid empty identifier")
l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+len(q)), "invalid empty identifier")
}
l.skipN(i + len(q))
return string(content)
Expand All @@ -428,7 +428,7 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
if c == '\\' {
i++
if !l.peekOk(i) {
l.panicf("invalid escape sequence: \\<eof>")
l.panicfAtPosition(token.Pos(l.pos+i-1), token.Pos(l.pos+i), "invalid escape sequence: \\<eof>")
}

c := l.peek(i)
Expand Down Expand Up @@ -457,65 +457,69 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
case '\\', '?', '"', '\'', '`':
content = append(content, c)
case 'x', 'X':
if !(l.peekOk(i+1) && char.IsHexDigit(l.peek(i)) && char.IsHexDigit(l.peek(i+1))) {
l.panicf("invalid escape sequence: hex escape sequence must be follwed by 2 hex digits")
for j := 0; j < 2; j++ {
if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) {
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits")
}
}
u, err := strconv.ParseUint(l.slice(i, i+2), 16, 8)
if err != nil {
l.panicf("invalid escape sequence: %v", err)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err)
}
content = append(content, byte(u))
i += 2
case 'u', 'U':
if !unicode {
l.panicf("invalid escape sequence: \\%c is not allowed in %s", c, name)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c is not allowed in %s", c, name)
}
size := 4
if c == 'U' {
size = 8
}
for j := 0; j < size; j++ {
if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) {
l.panicf("invalid escape sequence: \\%c must be followed by %d hex digits", c, size)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: \\%c must be followed by %d hex digits", c, size)
}
}
u, err := strconv.ParseUint(l.slice(i, i+size), 16, 32)
if err != nil {
l.panicf("invalid escape sequence: %v", err)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: %v", err)
}
if 0xD800 <= u && u <= 0xDFFF || 0x10FFFF < u {
l.panicf("invalid escape sequence: invalid code point: U+%04X", u)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: invalid code point: U+%04X", u)
}
var buf [utf8.MaxRune]byte
n := utf8.EncodeRune(buf[:], rune(u))
content = append(content, buf[:n]...)
i += size
case '0', '1', '2', '3':
if !(l.peekOk(i+1) && char.IsOctalDigit(l.peek(i)) && char.IsOctalDigit(l.peek(i+1))) {
l.panicf("invalid escape sequence: octal escape sequence must be follwed by 3 octal digits")
for j := 0; j < 2; j++ {
if !(l.peekOk(i+j) && char.IsOctalDigit(l.peek(i+j))) {
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits")
}
}
u, err := strconv.ParseUint(l.slice(i-1, i+2), 8, 8)
if err != nil {
l.panicf("invalid escape sequence: %v", err)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err)
}
content = append(content, byte(u))
i += 2
default:
l.panicf("invalid escape sequence: \\%c", c)
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c", c)
}

continue
}

if c == '\n' && len(q) != 3 {
l.panicf("unclosed %s: newline appears in non triple-quoted", name)
l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+1), "unclosed %s: newline appears in non triple-quoted", name)
}

content = append(content, c)
i++
}

panic(l.errorf("unclosed %s", name))
panic(l.errorfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i), "unclosed %s", name))
}

func (l *Lexer) skipSpaces() {
Expand Down Expand Up @@ -543,6 +547,7 @@ func (l *Lexer) skipComment() {
}

func (l *Lexer) skipCommentUntil(end string, mustEnd bool) {
pos := token.Pos(l.pos)
for !l.eof() {
if l.slice(0, len(end)) == end {
l.skipN(len(end))
Expand All @@ -551,8 +556,7 @@ func (l *Lexer) skipCommentUntil(end string, mustEnd bool) {
l.skip()
}
if mustEnd {
// TODO: improve error position
l.panicf("unclosed comment")
l.panicfAtPosition(pos, token.Pos(l.pos), "unclosed comment")
}
}

Expand Down Expand Up @@ -596,6 +600,17 @@ func (l *Lexer) errorf(msg string, param ...interface{}) *Error {
}
}

func (l *Lexer) errorfAtPosition(pos, end token.Pos, msg string, param ...interface{}) *Error {
return &Error{
Message: fmt.Sprintf(msg, param...),
Position: l.Position(pos, end),
}
}

func (l *Lexer) panicf(msg string, param ...interface{}) {
panic(l.errorf(msg, param...))
}

func (l *Lexer) panicfAtPosition(pos, end token.Pos, msg string, param ...interface{}) {
panic(l.errorfAtPosition(pos, end, msg, param...))
}
42 changes: 23 additions & 19 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,26 +143,27 @@ var lexerTestCases = []struct {
var lexerWrongTestCase = []struct {
source string
pos Pos
end Pos
message string
}{
{"\b", 0, "illegal input character: '\\b'"},
{`"foo`, 0, "unclosed string literal"},
{`R"foo`, 1, "unclosed raw string literal"},
{"'foo\n", 0, "unclosed string literal: newline appears in non triple-quoted"},
{"R'foo\n", 1, "unclosed raw string literal: newline appears in non triple-quoted"},
{"R'foo\\", 1, "invalid escape sequence: \\<eof>"},
{`"\400"`, 0, "invalid escape sequence: \\4"},
{`"\3xx"`, 0, "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits"},
{`"\xZZ"`, 0, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
{`"\XZZ"`, 0, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
{`B"\u0031"`, 1, "invalid escape sequence: \\u is not allowed in bytes literal"},
{`B"\U00000031"`, 1, "invalid escape sequence: \\U is not allowed in bytes literal"},
{`B"\U00000031"`, 1, "invalid escape sequence: \\U is not allowed in bytes literal"},
{`"\UFFFFFFFF"`, 0, "invalid escape sequence: invalid code point: U+FFFFFFFF"},
{"``", 0, "invalid empty identifier"},
{"1from", 1, "number literal cannot follow identifier without any spaces"},
{`'''0`, 0, "unclosed triple-quoted string literal"},
{`/*`, 2, "unclosed comment"},
{"\b", 0, 0, "illegal input character: '\\b'"},
{`"foo`, 0, 4, "unclosed string literal"},
{`R"foo`, 1, 5, "unclosed raw string literal"},
{"'foo\n", 0, 5, "unclosed string literal: newline appears in non triple-quoted"},
{"R'foo\n", 1, 6, "unclosed raw string literal: newline appears in non triple-quoted"},
{"R'foo\\", 5, 6, "invalid escape sequence: \\<eof>"},
{`"\400"`, 1, 3, "invalid escape sequence: \\4"},
{`"\3xx"`, 1, 4, "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits"},
{`"\xZZ"`, 1, 4, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
{`"\XZZ"`, 1, 4, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
{`B"\u0031"`, 2, 4, "invalid escape sequence: \\u is not allowed in bytes literal"},
{`B"\U00000031"`, 2, 4, "invalid escape sequence: \\U is not allowed in bytes literal"},
{`B"\U00000031"`, 2, 4, "invalid escape sequence: \\U is not allowed in bytes literal"},
{`"\UFFFFFFFF"`, 1, 11, "invalid escape sequence: invalid code point: U+FFFFFFFF"},
{"``", 0, 2, "invalid empty identifier"},
{"1from", 1, 1, "number literal cannot follow identifier without any spaces"},
{`'''0`, 0, 4, "unclosed triple-quoted string literal"},
{`/*`, 0, 2, "unclosed comment"},
}

func testLexer(t *testing.T, source string, tokens []*Token) {
Expand Down Expand Up @@ -240,7 +241,10 @@ func TestLexerWrong(t *testing.T) {
t.Errorf("expected error message: %q, but: %q", tc.message, e.Message)
}
if e.Position.Pos != tc.pos {
t.Errorf("expected error position: %v, but: %v", tc.pos, e.Position.Pos)
t.Errorf("expected error position (pos): %v, but: %v", tc.pos, e.Position.Pos)
}
if e.Position.End != tc.end {
t.Errorf("expected error position (end): %v, but: %v", tc.end, e.Position.End)
}
} else {
t.Errorf("unexpected error: %v", err)
Expand Down
Loading