From 39f4313e00c057da903043fd0390807c27c98008 Mon Sep 17 00:00:00 2001 From: WebFreak001 Date: Thu, 9 Feb 2023 23:16:42 +0100 Subject: [PATCH 1/2] attach token location to error tokens Makes invalid strings error at proper location instead of at 0,0 See confusion in https://forum.dlang.org/post/mcaldhecyexsolqoctsy@forum.dlang.org --- src/dparse/lexer.d | 90 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d index 01607596..ae9217f1 100644 --- a/src/dparse/lexer.d +++ b/src/dparse/lexer.d @@ -1208,8 +1208,7 @@ private pure nothrow @safe: { if (range.index >= range.bytes.length) { - error("Error: unterminated string literal"); - token = Token(tok!""); + error(token, "Error: unterminated string literal"); return; } version (X86_64) @@ -1254,8 +1253,7 @@ private pure nothrow @safe: { if (range.index >= range.bytes.length) { - error("Error: unterminated string literal"); - token = Token(tok!""); + error(token, "Error: unterminated string literal"); return; } version (X86_64) @@ -1280,8 +1278,7 @@ private pure nothrow @safe: range.popFront(); if (range.index >= range.bytes.length) { - error("Error: unterminated string literal"); - token = Token(tok!""); + error(token, "Error: unterminated string literal"); return; } range.popFront(); @@ -1289,8 +1286,7 @@ private pure nothrow @safe: { if (range.index >= range.bytes.length) { - error("Error: unterminated string literal"); - token = Token(tok!""); + error(token, "Error: unterminated string literal"); return; } else if (range.bytes[range.index] == '"') @@ -1388,8 +1384,7 @@ private pure nothrow @safe: } else { - error("Error: `\"` expected to end delimited string literal"); - token = Token(tok!""); + error(token, "Error: `\"` expected to end delimited string literal"); return; } } @@ -1464,6 +1459,13 @@ private pure nothrow @safe: } advance(_front); + + if (range.index >= range.bytes.length) + { + error(token, "Error: unterminated token string literal"); + return; + } + while (depth > 0 && !empty) { auto t = front(); @@ -1503,8 +1505,7 @@ private pure nothrow @safe: { if (range.index >= range.bytes.length) { - error("Error: unterminated hex string literal"); - token = Token(tok!""); + error(token, "Error: unterminated hex string literal"); return; } else if (isWhitespace()) @@ -1520,8 +1521,7 @@ private pure nothrow @safe: range.popFront(); break loop; default: - error("Error: invalid character in hex string"); - token = Token(tok!""); + error(token, "Error: invalid character in hex string"); return; } } @@ -1706,8 +1706,7 @@ private pure nothrow @safe: else { err: - error("Error: Expected `'` to end character literal"); - token = Token(tok!""); + error(token, "Error: Expected `'` to end character literal"); } } @@ -1848,6 +1847,12 @@ private pure nothrow @safe: auto mark = range.mark(); }; + void error(ref Token token, string message) + { + token.type = tok!""; + error(message); + } + void error(string message) { _messages ~= Message(range.line, range.column, message, true); @@ -2449,3 +2454,56 @@ unittest immutable t2 = e2.tok; immutable t3 = e3.tok; } + +/// empty '' is invalid syntax, but should still get parsed properly, with an +/// error token and proper location info +unittest +{ + import std.conv : to; + import std.exception : enforce; + + static immutable src = `module foo.bar; + +void main() { + x = ''; +} +`; + + LexerConfig cf; + StringCache ca = StringCache(16); + + const tokens = getTokensForParser(src, cf, &ca); + + int i; + assert(tokens[i++].type == tok!"module"); + assert(tokens[i++].type == tok!"identifier"); + assert(tokens[i++].type == tok!"."); + assert(tokens[i++].type == tok!"identifier"); + assert(tokens[i++].type == tok!";"); + assert(tokens[i++].type == tok!"void"); + assert(tokens[i++].type == tok!"identifier"); + assert(tokens[i++].type == tok!"("); + assert(tokens[i++].type == tok!")"); + assert(tokens[i++].type == tok!"{"); + assert(tokens[i++].type == tok!"identifier"); + assert(tokens[i++].type == tok!"="); + assert(tokens[i].type == tok!""); + assert(tokens[i].line == tokens[i - 1].line); + assert(tokens[i].column == tokens[i - 1].column + 2); + i++; + assert(tokens[i++].type == tok!";"); + assert(tokens[i++].type == tok!"}"); + + void checkInvalidTrailingString(const Token[] tokens) + { + assert(tokens.length == 3); + assert(tokens[2].index != 0); + assert(tokens[2].column >= 4); + assert(tokens[2].type == tok!""); + } + + checkInvalidTrailingString(getTokensForParser(`x = "foo`, cf, &ca)); + checkInvalidTrailingString(getTokensForParser(`x = r"foo`, cf, &ca)); + checkInvalidTrailingString(getTokensForParser("x = `foo", cf, &ca)); + checkInvalidTrailingString(getTokensForParser("x = q{foo", cf, &ca)); +} From 8cb897ff82d90527bf536991c5817eccebd08b1b Mon Sep 17 00:00:00 2001 From: WebFreak001 Date: Thu, 9 Feb 2023 23:24:10 +0100 Subject: [PATCH 2/2] more coverage, fix range error in q"str --- src/dparse/lexer.d | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d index ae9217f1..30ed88c9 100644 --- a/src/dparse/lexer.d +++ b/src/dparse/lexer.d @@ -1401,7 +1401,7 @@ private pure nothrow @safe: { Token ident; lexIdentifier(ident); - if (isNewline()) + if (!(range.index >= range.bytes.length) && isNewline()) popFrontWhitespaceAware(); else error("Newline expected"); @@ -1426,14 +1426,15 @@ private pure nothrow @safe: range.popFront(); } } + IdType type; if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '"') { + type = tok!"stringLiteral"; + lexStringSuffix(type); range.popFront(); } else error("`\"` expected"); - IdType type = tok!"stringLiteral"; - lexStringSuffix(type); token = Token(type, cache.intern(range.slice(mark)), line, column, index); } @@ -2504,6 +2505,9 @@ void main() { checkInvalidTrailingString(getTokensForParser(`x = "foo`, cf, &ca)); checkInvalidTrailingString(getTokensForParser(`x = r"foo`, cf, &ca)); + checkInvalidTrailingString(getTokensForParser(`x = x"00`, cf, &ca)); checkInvalidTrailingString(getTokensForParser("x = `foo", cf, &ca)); checkInvalidTrailingString(getTokensForParser("x = q{foo", cf, &ca)); + checkInvalidTrailingString(getTokensForParser(`x = q"foo`, cf, &ca)); + checkInvalidTrailingString(getTokensForParser("x = '", cf, &ca)); }