Skip to content

Commit

Permalink
fix(parser): fix parsing of multiline strings
Browse files Browse the repository at this point in the history
  • Loading branch information
larsgw committed Jan 8, 2025
1 parent 48b4307 commit fcddb84
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 41 deletions.
97 changes: 62 additions & 35 deletions src/parser/base.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ const escapes = {
'\\/': '/',
'\\"': '"',
'\\b': '\x08',
'\\f': '\x0C'
'\\f': '\x0C',
'\\s': '\x20'
}

/**
Expand Down Expand Up @@ -185,47 +186,66 @@ class BaseParser extends EmbeddedActionsParser {
* @return {string}
*/
this.RULE('multilineString', () => {
this.CONSUME(Tokens.MultiLineOpenQuote)

const lines = []

this.CONSUME(Tokens.MultiLineOpenQuote)
let previousToken = this.CONSUME(Tokens.NewLine)
this.MANY(() => {
this.AT_LEAST_ONE(() => {
const newline = this.CONSUME(Tokens.NewLine)
const prefix = this.OPTION(() => this.CONSUME(Tokens.WhiteSpace))

let line = ''
this.MANY2(() => {
line += this.OR([
{ ALT: () => this.CONSUME1(Tokens.WhiteSpace).image },
{ ALT: () => this.CONSUME(Tokens.Unicode).image },
{ ALT: () => escapes[this.CONSUME(Tokens.Escape).image] },
{ ALT: () => this.SUBRULE(this.unicodeEscape) },

this.MANY1(() => {
this.OR([
{ ALT: () => { line += this.CONSUME(Tokens.MultiLineSingleQuote).image } },
{ ALT: () => { line += this.CONSUME(Tokens.Unicode).image } },
{ ALT: () => { line += escapes[this.CONSUME(Tokens.Escape).image] } },
{ ALT: () => { line += this.SUBRULE(this.unicodeEscape) } },
{ ALT: () => { this.CONSUME(Tokens.WhiteSpaceEscape) } },
{ ALT: () => { line += this.CONSUME1(Tokens.WhiteSpace).image } },
{
ALT: () => {
this.CONSUME(Tokens.WhiteSpaceEscape)
return ''
}
GATE: () => this.LA(1).tokenType === Tokens.MultiLineCloseQuote,
ALT: () => {}
}
])
})
lines.push({ previousToken, prefix, line })
previousToken = this.CONSUME1(Tokens.NewLine)

lines.push({ line, prefix, newline })
})

const prefix = this.OPTION1(() => this.CONSUME2(Tokens.WhiteSpace).image) ?? ''
this.CONSUME(Tokens.MultiLineCloseQuote)

if (prefix.length > 0) {
return this.ACTION(() => {
const whitespacePattern = new RegExp('^(' + Tokens.WhiteSpace.PATTERN.source + ')$')
const end = lines.pop()

if (end.line !== '') {
const error = new MismatchedTokenException('Multiline string cannot be dedented', end.prefix, end.newline)
this.SAVE_ERROR(error)
}

const prefix = end.prefix ? end.prefix.image : ''
for (let i = 0; i < lines.length; i++) {
if (lines[i].prefix && lines[i].prefix.image.startsWith(prefix)) {
lines[i] = lines[i].prefix.image.slice(prefix.length) + lines[i].line
} else {
const error = new MismatchedTokenException('Multiline string cannot be dedented', lines[i].prefix, lines[i].previousToken)
throw this.SAVE_ERROR(error)
if (lines[i].line.length === 0) {
lines[i] = ''
continue
}

if (prefix) {
if (lines[i].prefix && lines[i].prefix.image.startsWith(prefix)) {
lines[i] = lines[i].prefix.image.slice(prefix.length) + lines[i].line
continue
} else {
const error = new MismatchedTokenException('Multiline string cannot be dedented', lines[i].prefix, lines[i].newline)
this.SAVE_ERROR(error)
}
}

lines[i] = lines[i].prefix ? lines[i].prefix.image + lines[i].line : lines[i].line
}
}

return lines.join('\n')
return lines.join('\n')
})
})

/**
Expand Down Expand Up @@ -280,13 +300,13 @@ class BaseParser extends EmbeddedActionsParser {
token.startColumn + start + lines[0].length
)
const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken)
throw this.SAVE_ERROR(error)
this.SAVE_ERROR(error)
}

const prefix = lines[lines.length - 1]
const whitespacePattern = new RegExp('^(' + Tokens.WhiteSpace.PATTERN.source + ')$')

if (!whitespacePattern.test(prefix)) {
if (prefix.length > 0 && !prefix.match(whitespacePattern)) {
const errorToken = createTokenInstance(
Tokens.Unicode,
prefix,
Expand All @@ -298,26 +318,33 @@ class BaseParser extends EmbeddedActionsParser {
prefix.length
)
const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken)
throw this.SAVE_ERROR(error)
this.SAVE_ERROR(error)
}

if (prefix.length > 0) {
for (let i = 1; i < lines.length - 1; i++) {
for (let i = 1; i < lines.length - 1; i++) {
if (lines[i].match(whitespacePattern) || lines[i].length === 0) {
lines[i] = ''
continue
}

if (prefix) {
if (lines[i].startsWith(prefix)) {
lines[i] = lines[i].slice(prefix.length)
} else {
// TODO newline length
const startOffset = token.startOffset + start + lines.slice(0, i).join().length + i
const errorToken = createTokenInstance(
Tokens.Unicode,
lines[i],
token.startOffset + start + lines.slice(0, i) + i,
token.startOffset + start + lines.slice(0, i + 1) + i,
startOffset,
startOffset + lines[i].length,
token.startLine + i,
token.startLine + i,
0,
lines[i].length
)
const error = new MismatchedTokenException('Multiline string cannot be dedented', )
throw this.SAVE_ERROR(error)
const error = new MismatchedTokenException('Multiline string cannot be dedented', errorToken)
this.SAVE_ERROR(error)
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/parser/kdl.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ const tokens = {
Tokens.CloseQuote
],
multilineString: [
Tokens.MultiLineCloseQuote,
Tokens.MultiLineSingleQuote,
Tokens.NewLine,
Tokens.WhiteSpace,
Tokens.Unicode,
Tokens.Escape,
Tokens.UnicodeEscape,
Tokens.WhiteSpaceEscape,
Tokens.MultiLineCloseQuote
Tokens.WhiteSpaceEscape
]
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/tokens.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ const CloseQuote = createToken({ name: 'CloseQuote', pattern: /"/, pop_mode: tru

const MultiLineOpenQuote = createToken({ name: 'MultiLineOpenQuote', pattern: /"""/, push_mode: 'multilineString' })
const MultiLineCloseQuote = createToken({ name: 'MultiLineCloseQuote', pattern: /"""/, pop_mode: true })
const MultiLineSingleQuote = createToken({ name: 'MultiLineSingleQuote', pattern: /"/ })

// Query language
const LeftBracket = createToken({ name: 'LeftBracket', pattern: /\[/ })
Expand Down Expand Up @@ -151,6 +152,7 @@ module.exports = {
CloseQuote,
MultiLineOpenQuote,
MultiLineCloseQuote,
MultiLineSingleQuote,
LeftBracket,
RightBracket,
GreaterThan,
Expand Down
7 changes: 7 additions & 0 deletions test/kdl/value_multiple.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
node "foo" "bar"

node """
foo
""" """
bar
"""
8 changes: 4 additions & 4 deletions test/spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ describe('Custom tests', function () {
})
} else {
it('fails to parse', function () {
const actual = parse(input)
assert.strictEqual(actual.output, undefined)
const { errors } = parse(input)
assert.notDeepStrictEqual(errors, [])
})
}
})
Expand Down Expand Up @@ -119,8 +119,8 @@ describe('Official test suite', function () {
}
} else {
it('fails to parse', function () {
assert.deepStrictEqual(parse(input).output, undefined)
assert.notDeepStrictEqual(parse(input).errors, [])
const { errors } = parse(input)
assert.notDeepStrictEqual(errors, [])
})
}
})
Expand Down
4 changes: 4 additions & 0 deletions test/suite.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,9 @@
],
"multiline_string_raw": [
{ "name": "node", "values": ["{\n \"newline\": \"\\n\"\n}"] }
],
"value_multiple": [
{ "name": "node", "values": ["foo", "bar"] },
{ "name": "node", "values": ["foo", "bar"] }
]
}

0 comments on commit fcddb84

Please sign in to comment.