Skip to content

Commit

Permalink
fix #1599: U+30FB and U+FF65 in ES5 vs. ES6+
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Sep 14, 2021
1 parent 5347239 commit 5c10033
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 31 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## Unreleased

* Fix U+30FB and U+FF65 in identifier names in ES5 vs. ES6+ ([#1599](https://github.com/evanw/esbuild/issues/1599))

The ES6 specification caused two code points that were previously valid in identifier names in ES5 to no longer be valid in identifier names in ES6+. The two code points are:

* `U+30FB` i.e. `KATAKANA MIDDLE DOT` i.e. `・`
* `U+FF65` i.e. `HALFWIDTH KATAKANA MIDDLE DOT` i.e. `・`

This means that using ES6+ parsing rules will fail to parse some valid ES5 code, and generating valid ES5 code may fail to be parsed using ES6+ parsing rules. For example, esbuild would previously fail to parse `x.y・` even though it's valid ES5 code (since it's not valid ES6+ code) and esbuild could generate `{y・:x}` when minifying even though it's not valid ES6+ code (since it's valid ES5 code). This problem is the result of my incorrect assumption that ES6 is a superset of ES5.

As of this release, esbuild will now parse a superset of ES5 and ES6+ and will now quote identifier names when possible if it's not considered to be a valid identifier name in either ES5 or ES6+. In other words, a union of ES5 and ES6 rules is used for parsing and the intersection of ES5 and ES6 rules is used for printing.

## 0.12.27

* Update JavaScript syntax feature compatibility tables ([#1594](https://github.com/evanw/esbuild/issues/1594))
Expand Down
26 changes: 13 additions & 13 deletions internal/js_lexer/js_lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,17 +577,17 @@ func IsIdentifier(text string) bool {
return true
}

func IsIdentifierES5(text string) bool {
func IsIdentifierES5AndESNext(text string) bool {
if len(text) == 0 {
return false
}
for i, codePoint := range text {
if i == 0 {
if !IsIdentifierStartES5(codePoint) {
if !IsIdentifierStartES5AndESNext(codePoint) {
return false
}
} else {
if !IsIdentifierContinueES5(codePoint) {
if !IsIdentifierContinueES5AndESNext(codePoint) {
return false
}
}
Expand Down Expand Up @@ -652,8 +652,8 @@ func IsIdentifierUTF16(text []uint16) bool {
return true
}

// This does "IsIdentifierES5(UTF16ToString(text))" without any allocations
func IsIdentifierES5UTF16(text []uint16) bool {
// This does "IsIdentifierES5AndESNext(UTF16ToString(text))" without any allocations
func IsIdentifierES5AndESNextUTF16(text []uint16) bool {
n := len(text)
if n == 0 {
return false
Expand All @@ -668,11 +668,11 @@ func IsIdentifierES5UTF16(text []uint16) bool {
}
}
if isStart {
if !IsIdentifierStartES5(r1) {
if !IsIdentifierStartES5AndESNext(r1) {
return false
}
} else {
if !IsIdentifierContinueES5(r1) {
if !IsIdentifierContinueES5AndESNext(r1) {
return false
}
}
Expand All @@ -695,7 +695,7 @@ func IsIdentifierStart(codePoint rune) bool {
return false
}

return unicode.Is(idStart, codePoint)
return unicode.Is(idStartES5OrESNext, codePoint)
}

func IsIdentifierContinue(codePoint rune) bool {
Expand All @@ -718,10 +718,10 @@ func IsIdentifierContinue(codePoint rune) bool {
return true
}

return unicode.Is(idContinue, codePoint)
return unicode.Is(idContinueES5OrESNext, codePoint)
}

func IsIdentifierStartES5(codePoint rune) bool {
func IsIdentifierStartES5AndESNext(codePoint rune) bool {
switch codePoint {
case '_', '$',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
Expand All @@ -736,10 +736,10 @@ func IsIdentifierStartES5(codePoint rune) bool {
return false
}

return unicode.Is(idStartES5, codePoint)
return unicode.Is(idStartES5AndESNext, codePoint)
}

func IsIdentifierContinueES5(codePoint rune) bool {
func IsIdentifierContinueES5AndESNext(codePoint rune) bool {
switch codePoint {
case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
Expand All @@ -759,7 +759,7 @@ func IsIdentifierContinueES5(codePoint rune) bool {
return true
}

return unicode.Is(idContinueES5, codePoint)
return unicode.Is(idContinueES5AndESNext, codePoint)
}

// See the "White Space Code Points" table in the ECMAScript standard
Expand Down
18 changes: 9 additions & 9 deletions internal/js_lexer/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package js_lexer

import "unicode"

var idStartES5 = &unicode.RangeTable{
var idStartES5AndESNext = &unicode.RangeTable{
LatinOffset: 117,
R16: []unicode.Range16{
{Lo: 0x41, Hi: 0x5a, Stride: 1},
Expand Down Expand Up @@ -266,7 +266,7 @@ var idStartES5 = &unicode.RangeTable{
},
}

var idContinueES5 = &unicode.RangeTable{
var idContinueES5AndESNext = &unicode.RangeTable{
LatinOffset: 128,
R16: []unicode.Range16{
{Lo: 0x30, Hi: 0x39, Stride: 1},
Expand Down Expand Up @@ -578,7 +578,8 @@ var idContinueES5 = &unicode.RangeTable{
{Lo: 0x3041, Hi: 0x3094, Stride: 1},
{Lo: 0x3099, Hi: 0x309a, Stride: 1},
{Lo: 0x309d, Hi: 0x309e, Stride: 1},
{Lo: 0x30a1, Hi: 0x30fe, Stride: 1},
{Lo: 0x30a1, Hi: 0x30fa, Stride: 1},
{Lo: 0x30fc, Hi: 0x30fe, Stride: 1},
{Lo: 0x3105, Hi: 0x312c, Stride: 1},
{Lo: 0x3131, Hi: 0x318e, Stride: 1},
{Lo: 0x31a0, Hi: 0x31b7, Stride: 1},
Expand Down Expand Up @@ -610,15 +611,15 @@ var idContinueES5 = &unicode.RangeTable{
{Lo: 0xff21, Hi: 0xff3a, Stride: 1},
{Lo: 0xff3f, Hi: 0xff3f, Stride: 1},
{Lo: 0xff41, Hi: 0xff5a, Stride: 1},
{Lo: 0xff65, Hi: 0xffbe, Stride: 1},
{Lo: 0xff66, Hi: 0xffbe, Stride: 1},
{Lo: 0xffc2, Hi: 0xffc7, Stride: 1},
{Lo: 0xffca, Hi: 0xffcf, Stride: 1},
{Lo: 0xffd2, Hi: 0xffd7, Stride: 1},
{Lo: 0xffda, Hi: 0xffdc, Stride: 1},
},
}

var idStart = &unicode.RangeTable{
var idStartES5OrESNext = &unicode.RangeTable{
LatinOffset: 117,
R16: []unicode.Range16{
{Lo: 0x41, Hi: 0x5a, Stride: 1},
Expand Down Expand Up @@ -1248,7 +1249,7 @@ var idStart = &unicode.RangeTable{
},
}

var idContinue = &unicode.RangeTable{
var idContinueES5OrESNext = &unicode.RangeTable{
LatinOffset: 129,
R16: []unicode.Range16{
{Lo: 0x30, Hi: 0x39, Stride: 1},
Expand Down Expand Up @@ -1600,8 +1601,7 @@ var idContinue = &unicode.RangeTable{
{Lo: 0x3038, Hi: 0x303c, Stride: 1},
{Lo: 0x3041, Hi: 0x3096, Stride: 1},
{Lo: 0x3099, Hi: 0x309f, Stride: 1},
{Lo: 0x30a1, Hi: 0x30fa, Stride: 1},
{Lo: 0x30fc, Hi: 0x30ff, Stride: 1},
{Lo: 0x30a1, Hi: 0x30ff, Stride: 1},
{Lo: 0x3105, Hi: 0x312f, Stride: 1},
{Lo: 0x3131, Hi: 0x318e, Stride: 1},
{Lo: 0x31a0, Hi: 0x31bf, Stride: 1},
Expand Down Expand Up @@ -1678,7 +1678,7 @@ var idContinue = &unicode.RangeTable{
{Lo: 0xff21, Hi: 0xff3a, Stride: 1},
{Lo: 0xff3f, Hi: 0xff3f, Stride: 1},
{Lo: 0xff41, Hi: 0xff5a, Stride: 1},
{Lo: 0xff66, Hi: 0xffbe, Stride: 1},
{Lo: 0xff65, Hi: 0xffbe, Stride: 1},
{Lo: 0xffc2, Hi: 0xffc7, Stride: 1},
{Lo: 0xffca, Hi: 0xffcf, Stride: 1},
{Lo: 0xffd2, Hi: 0xffd7, Stride: 1},
Expand Down
14 changes: 14 additions & 0 deletions internal/js_parser/js_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,20 @@ func TestRegExp(t *testing.T) {
`)
}

func TestUnicodeIdentifierNames(t *testing.T) {
// There are two code points that are valid in identifiers in ES5 but not in ES6+:
//
// U+30FB KATAKANA MIDDLE DOT
// U+FF65 HALFWIDTH KATAKANA MIDDLE DOT
//
expectPrinted(t, "x = {x・: 0}", "x = { \"x・\": 0 };\n")
expectPrinted(t, "x = {x・: 0}", "x = { \"x・\": 0 };\n")
expectPrinted(t, "x = {xπ: 0}", "x = { xπ: 0 };\n")
expectPrinted(t, "x = y.x・", "x = y[\"x・\"];\n")
expectPrinted(t, "x = y.x・", "x = y[\"x・\"];\n")
expectPrinted(t, "x = y.xπ", "x = y.xπ;\n")
}

func TestIdentifierEscapes(t *testing.T) {
expectPrinted(t, "var _\\u0076\\u0061\\u0072", "var _var;\n")
expectParseError(t, "var \\u0076\\u0061\\u0072", "<stdin>: error: Expected identifier but found \"\\\\u0076\\\\u0061\\\\u0072\"\n")
Expand Down
6 changes: 3 additions & 3 deletions internal/js_printer/js_printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,19 +510,19 @@ func (p *printer) printClauseAlias(alias string) {
// JavaScript language target that we support.

func CanEscapeIdentifier(name string, unsupportedJSFeatures compat.JSFeature, asciiOnly bool) bool {
return js_lexer.IsIdentifierES5(name) && (!asciiOnly ||
return js_lexer.IsIdentifierES5AndESNext(name) && (!asciiOnly ||
!unsupportedJSFeatures.Has(compat.UnicodeEscapes) ||
!js_lexer.ContainsNonBMPCodePoint(name))
}

func (p *printer) canPrintIdentifier(name string) bool {
return js_lexer.IsIdentifierES5(name) && (!p.options.ASCIIOnly ||
return js_lexer.IsIdentifierES5AndESNext(name) && (!p.options.ASCIIOnly ||
!p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) ||
!js_lexer.ContainsNonBMPCodePoint(name))
}

func (p *printer) canPrintIdentifierUTF16(name []uint16) bool {
return js_lexer.IsIdentifierES5UTF16(name) && (!p.options.ASCIIOnly ||
return js_lexer.IsIdentifierES5AndESNextUTF16(name) && (!p.options.ASCIIOnly ||
!p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) ||
!js_lexer.ContainsNonBMPCodePointUTF16(name))
}
Expand Down
22 changes: 16 additions & 6 deletions scripts/gen-unicode-table.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,20 @@ const idContinueES5 = idStartES5.concat(
// is presumed to be the Unicode set, collection 10646.
//
// UnicodeIDStart: any Unicode code point with the Unicode property “ID_Start”
const idStart = require('@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points');
const idStartESNext = require('@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points');
const idStartESNextSet = new Set(idStartESNext);

// UnicodeIDContinue: any Unicode code point with the Unicode property “ID_Continue”
const idContinue = require('@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points');
const idContinueESNext = require('@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points');
const idContinueESNextSet = new Set(idContinueESNext);

// These identifiers are valid in both ES5 and ES6+ (i.e. an intersection of both)
const idStartES5AndESNext = idStartES5.filter(n => idStartESNextSet.has(n));
const idContinueES5AndESNext = idContinueES5.filter(n => idContinueESNextSet.has(n));

// These identifiers are valid in either ES5 or ES6+ (i.e. a union of both)
const idStartES5OrESNext = [...new Set(idStartES5.concat(idStartESNext))].sort((a, b) => a - b);
const idContinueES5OrESNext = [...new Set(idContinueES5.concat(idContinueESNext))].sort((a, b) => a - b);

function generateRangeTable(codePoints) {
let lines = [];
Expand Down Expand Up @@ -105,11 +115,11 @@ package js_lexer
import "unicode"
var idStartES5 = ${generateRangeTable(idStartES5)}
var idStartES5AndESNext = ${generateRangeTable(idStartES5AndESNext)}
var idContinueES5 = ${generateRangeTable(idContinueES5)}
var idContinueES5AndESNext = ${generateRangeTable(idContinueES5AndESNext)}
var idStart = ${generateRangeTable(idStart)}
var idStartES5OrESNext = ${generateRangeTable(idStartES5OrESNext)}
var idContinue = ${generateRangeTable(idContinue)}
var idContinueES5OrESNext = ${generateRangeTable(idContinueES5OrESNext)}
`);

0 comments on commit 5c10033

Please sign in to comment.