Skip to content

Commit

Permalink
Lua 5.4: relax UTF-8 escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
fstirlitz committed May 18, 2021
1 parent ce18371 commit 003e1b4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 8 deletions.
29 changes: 23 additions & 6 deletions luaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,32 @@
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else /* istanbul ignore else */ if (codepoint < 0x110000) {
} else if (codepoint < 0x200000) {
return String.fromCharCode(
highMask | 0xf0 | (codepoint >> 18) ,
highMask | 0x80 | ((codepoint >> 12) & 0x3f),
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else if (codepoint < 0x4000000) {
return String.fromCharCode(
highMask | 0xf8 | (codepoint >> 24) ,
highMask | 0x80 | ((codepoint >> 18) & 0x3f),
highMask | 0x80 | ((codepoint >> 12) & 0x3f),
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else if (codepoint <= 0x7fffffff) {
return String.fromCharCode(
highMask | 0xfc | (codepoint >> 30) ,
highMask | 0x80 | ((codepoint >> 24) & 0x3f),
highMask | 0x80 | ((codepoint >> 18) & 0x3f),
highMask | 0x80 | ((codepoint >> 12) & 0x3f),
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else {
// TODO: Lua 5.4 allows up to six-byte sequences, as in UTF-8:1993
return null;
throw new Error('Should not happen');
}
}

Expand Down Expand Up @@ -1160,7 +1176,7 @@

while (isHexDigit(input.charCodeAt(index))) {
++index;
if (index - escStart > 6)
if (index - escStart > (features.relaxedUTF8 ? 8 : 6))
raise(null, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index));
}

Expand All @@ -1175,7 +1191,7 @@
var codepoint = parseInt(input.slice(escStart, index - 1) || '0', 16);
var frag = '\\' + input.slice(sequenceStart, index);

if (codepoint > 0x10ffff) {
if (codepoint > (features.relaxedUTF8 ? 0x7fffffff : 0x10ffff)) {
raise(null, errors.tooLargeCodepoint, frag);
}

Expand Down Expand Up @@ -2716,7 +2732,8 @@
integerDivision: true,
relaxedBreak: true,
noLabelShadowing: true,
attributes: { 'const': true, 'close': true }
attributes: { 'const': true, 'close': true },
relaxedUTF8: true
},
'LuaJIT': {
// XXX: LuaJIT language features may depend on compilation options; may need to
Expand Down
10 changes: 8 additions & 2 deletions test/runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,12 @@
, "'\ud83d\udca9'"
, '[[\ud83d\udca9]]'
]
, [ '"\\u{db80}"'
, '"\\xed\\xae\\x80"'
]
, [ '"\\u{7fffffff}"'
, '"\\xfd\\xbf\\xbf\\xbf\\xbf\\xbf"'
]
, [ '"\\\\a"'
, '[==[\\a]==]'
]
Expand Down Expand Up @@ -551,11 +557,11 @@

var list = testcases[i];
var left = luaparse.parse('return ' + list[0],
{ "luaVersion": "5.3" }).body[0].arguments[0];
{ "luaVersion": "5.4" }).body[0].arguments[0];

for (var j = 1; j < list.length; ++j) {
var right = luaparse.parse('return ' + list[j],
{ "luaVersion": "5.3" }).body[0].arguments[0];
{ "luaVersion": "5.4" }).body[0].arguments[0];

this.equal(left.value, right.value, symbolicControlChars(left.raw) + ' == ' + symbolicControlChars(right.raw));
left = right;
Expand Down

0 comments on commit 003e1b4

Please sign in to comment.