From fcddb8461002002c927782f26ae4dd96ed3efe30 Mon Sep 17 00:00:00 2001 From: Lars Willighagen Date: Sun, 5 Jan 2025 20:45:24 +0100 Subject: [PATCH] fix(parser): fix parsing of multiline strings --- src/parser/base.js | 97 ++++++++++++++++++++++++------------- src/parser/kdl.js | 5 +- src/parser/tokens.js | 2 + test/kdl/value_multiple.kdl | 7 +++ test/spec.js | 8 +-- test/suite.json | 4 ++ 6 files changed, 82 insertions(+), 41 deletions(-) create mode 100644 test/kdl/value_multiple.kdl diff --git a/src/parser/base.js b/src/parser/base.js index a249dc2..16eadbb 100644 --- a/src/parser/base.js +++ b/src/parser/base.js @@ -18,7 +18,8 @@ const escapes = { '\\/': '/', '\\"': '"', '\\b': '\x08', - '\\f': '\x0C' + '\\f': '\x0C', + '\\s': '\x20' } /** @@ -185,47 +186,66 @@ class BaseParser extends EmbeddedActionsParser { * @return {string} */ this.RULE('multilineString', () => { + this.CONSUME(Tokens.MultiLineOpenQuote) + const lines = [] - this.CONSUME(Tokens.MultiLineOpenQuote) - let previousToken = this.CONSUME(Tokens.NewLine) - this.MANY(() => { + this.AT_LEAST_ONE(() => { + const newline = this.CONSUME(Tokens.NewLine) const prefix = this.OPTION(() => this.CONSUME(Tokens.WhiteSpace)) - let line = '' - this.MANY2(() => { - line += this.OR([ - { ALT: () => this.CONSUME1(Tokens.WhiteSpace).image }, - { ALT: () => this.CONSUME(Tokens.Unicode).image }, - { ALT: () => escapes[this.CONSUME(Tokens.Escape).image] }, - { ALT: () => this.SUBRULE(this.unicodeEscape) }, + + this.MANY1(() => { + this.OR([ + { ALT: () => { line += this.CONSUME(Tokens.MultiLineSingleQuote).image } }, + { ALT: () => { line += this.CONSUME(Tokens.Unicode).image } }, + { ALT: () => { line += escapes[this.CONSUME(Tokens.Escape).image] } }, + { ALT: () => { line += this.SUBRULE(this.unicodeEscape) } }, + { ALT: () => { this.CONSUME(Tokens.WhiteSpaceEscape) } }, + { ALT: () => { line += this.CONSUME1(Tokens.WhiteSpace).image } }, { - ALT: () => { - this.CONSUME(Tokens.WhiteSpaceEscape) - return '' - } + GATE: () => this.LA(1).tokenType === Tokens.MultiLineCloseQuote, + ALT: () => {} } ]) }) - lines.push({ previousToken, prefix, line }) - previousToken = this.CONSUME1(Tokens.NewLine) + + lines.push({ line, prefix, newline }) }) - const prefix = this.OPTION1(() => this.CONSUME2(Tokens.WhiteSpace).image) ?? '' this.CONSUME(Tokens.MultiLineCloseQuote) - if (prefix.length > 0) { + return this.ACTION(() => { + const whitespacePattern = new RegExp('^(' + Tokens.WhiteSpace.PATTERN.source + ')$') + const end = lines.pop() + + if (end.line !== '') { + const error = new MismatchedTokenException('Multiline string cannot be dedented', end.prefix, end.newline) + this.SAVE_ERROR(error) + } + + const prefix = end.prefix ? end.prefix.image : '' for (let i = 0; i < lines.length; i++) { - if (lines[i].prefix && lines[i].prefix.image.startsWith(prefix)) { - lines[i] = lines[i].prefix.image.slice(prefix.length) + lines[i].line - } else { - const error = new MismatchedTokenException('Multiline string cannot be dedented', lines[i].prefix, lines[i].previousToken) - throw this.SAVE_ERROR(error) + if (lines[i].line.length === 0) { + lines[i] = '' + continue + } + + if (prefix) { + if (lines[i].prefix && lines[i].prefix.image.startsWith(prefix)) { + lines[i] = lines[i].prefix.image.slice(prefix.length) + lines[i].line + continue + } else { + const error = new MismatchedTokenException('Multiline string cannot be dedented', lines[i].prefix, lines[i].newline) + this.SAVE_ERROR(error) + } } + + lines[i] = lines[i].prefix ? lines[i].prefix.image + lines[i].line : lines[i].line } - } - return lines.join('\n') + return lines.join('\n') + }) }) /** @@ -280,13 +300,13 @@ class BaseParser extends EmbeddedActionsParser { token.startColumn + start + lines[0].length ) const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken) - throw this.SAVE_ERROR(error) + this.SAVE_ERROR(error) } const prefix = lines[lines.length - 1] const whitespacePattern = new RegExp('^(' + Tokens.WhiteSpace.PATTERN.source + ')$') - if (!whitespacePattern.test(prefix)) { + if (prefix.length > 0 && !prefix.match(whitespacePattern)) { const errorToken = createTokenInstance( Tokens.Unicode, prefix, @@ -298,26 +318,33 @@ class BaseParser extends EmbeddedActionsParser { prefix.length ) const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken) - throw this.SAVE_ERROR(error) + this.SAVE_ERROR(error) } - if (prefix.length > 0) { - for (let i = 1; i < lines.length - 1; i++) { + for (let i = 1; i < lines.length - 1; i++) { + if (lines[i].match(whitespacePattern) || lines[i].length === 0) { + lines[i] = '' + continue + } + + if (prefix) { if (lines[i].startsWith(prefix)) { lines[i] = lines[i].slice(prefix.length) } else { + // TODO newline length + const startOffset = token.startOffset + start + lines.slice(0, i).join().length + i const errorToken = createTokenInstance( Tokens.Unicode, lines[i], - token.startOffset + start + lines.slice(0, i) + i, - token.startOffset + start + lines.slice(0, i + 1) + i, + startOffset, + startOffset + lines[i].length, token.startLine + i, token.startLine + i, 0, lines[i].length ) - const error = new MismatchedTokenException('Multiline string cannot be dedented', ) - throw this.SAVE_ERROR(error) + const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken) + this.SAVE_ERROR(error) } } } diff --git a/src/parser/kdl.js b/src/parser/kdl.js index 4344deb..26db889 100644 --- a/src/parser/kdl.js +++ b/src/parser/kdl.js @@ -48,13 +48,14 @@ const tokens = { Tokens.CloseQuote ], multilineString: [ + Tokens.MultiLineCloseQuote, + Tokens.MultiLineSingleQuote, Tokens.NewLine, Tokens.WhiteSpace, Tokens.Unicode, Tokens.Escape, Tokens.UnicodeEscape, - Tokens.WhiteSpaceEscape, - Tokens.MultiLineCloseQuote + Tokens.WhiteSpaceEscape ] } } diff --git a/src/parser/tokens.js b/src/parser/tokens.js index 9108ede..baec24e 100644 --- a/src/parser/tokens.js +++ b/src/parser/tokens.js @@ -97,6 +97,7 @@ const CloseQuote = createToken({ name: 'CloseQuote', pattern: /"/, pop_mode: tru const MultiLineOpenQuote = createToken({ name: 'MultiLineOpenQuote', pattern: /"""/, push_mode: 'multilineString' }) const MultiLineCloseQuote = createToken({ name: 'MultiLineCloseQuote', pattern: /"""/, pop_mode: true }) +const MultiLineSingleQuote = createToken({ name: 'MultiLineSingleQuote', pattern: /"/ }) // Query language const LeftBracket = createToken({ name: 'LeftBracket', pattern: /\[/ }) @@ -151,6 +152,7 @@ module.exports = { CloseQuote, MultiLineOpenQuote, MultiLineCloseQuote, + MultiLineSingleQuote, LeftBracket, RightBracket, GreaterThan, diff --git a/test/kdl/value_multiple.kdl b/test/kdl/value_multiple.kdl new file mode 100644 index 0000000..b645a0e --- /dev/null +++ b/test/kdl/value_multiple.kdl @@ -0,0 +1,7 @@ +node "foo" "bar" + +node """ +foo +""" """ +bar +""" diff --git a/test/spec.js b/test/spec.js index 023b3c9..d3d82df 100644 --- a/test/spec.js +++ b/test/spec.js @@ -42,8 +42,8 @@ describe('Custom tests', function () { }) } else { it('fails to parse', function () { - const actual = parse(input) - assert.strictEqual(actual.output, undefined) + const { errors } = parse(input) + assert.notDeepStrictEqual(errors, []) }) } }) @@ -119,8 +119,8 @@ describe('Official test suite', function () { } } else { it('fails to parse', function () { - assert.deepStrictEqual(parse(input).output, undefined) - assert.notDeepStrictEqual(parse(input).errors, []) + const { errors } = parse(input) + assert.notDeepStrictEqual(errors, []) }) } }) diff --git a/test/suite.json b/test/suite.json index 4e5441b..fa3ed5b 100644 --- a/test/suite.json +++ b/test/suite.json @@ -74,5 +74,9 @@ ], "multiline_string_raw": [ { "name": "node", "values": ["{\n \"newline\": \"\\n\"\n}"] } + ], + "value_multiple": [ + { "name": "node", "values": ["foo", "bar"] }, + { "name": "node", "values": ["foo", "bar"] } ] }