Skip to content

Commit

Permalink
utf16 bug in outside-file line/column accounting
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Mar 17, 2021
1 parent 8a21240 commit 7320a0e
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 49 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@
Using newer JavaScript syntax with an older target environment (e.g. `chrome10`) can cause a build error if esbuild doesn't support transforming that syntax such that it is compatible with that target environment. Previously the error message was generic but with this release, the target environment is called outp explicitly in the error message. This is helpful if esbuild is being wrapped by some other tool since the other tool can obscure what target environment is actually being passed to esbuild.
* Fix an issue with Unicode and source maps
This release fixes a bug where non-ASCII content that ended up in an output file but that was not part of an input file could throw off source mappings. An example of this would be passing a string containing non-ASCII characters to the `globalName` setting with the `minify` setting active and the `charset` setting set to `utf8`. The conditions for this bug are fairly specific and unlikely to be hit, so it's unsurprising that this issue hasn't been discovered earlier. It's also unlikely that this issue affected real-world code.
The underlying cause is that while the meaning of column numbers in source maps is undefined in the specification, in practice most tools treat it as the number of UTF-16 code units from the start of the line. The bug happened because column increments for outside-of-file characters were incorrectly counted using byte offsets instead of UTF-16 code unit counts.
## 0.9.2
* Fix export name annotations in CommonJS output for node ([#960](https://github.com/evanw/esbuild/issues/960))
Expand Down
59 changes: 16 additions & 43 deletions internal/bundler/linker.go
Original file line number Diff line number Diff line change
Expand Up @@ -3379,11 +3379,6 @@ type stmtList struct {
entryPointTail []js_ast.Stmt
}

type lineColumnOffset struct {
lines int
columns int
}

type compileResultJS struct {
js_printer.PrintResult

Expand All @@ -3396,7 +3391,7 @@ type compileResultJS struct {

// This is the line and column offset since the previous JavaScript string
// or the start of the file if this is the first JavaScript string.
generatedOffset lineColumnOffset
generatedOffset sourcemap.LineColumnOffset
}

func (c *linkerContext) generateCodeForFileInChunkJS(
Expand Down Expand Up @@ -3795,7 +3790,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
waitGroup.Wait()

j := js_printer.Joiner{}
prevOffset := lineColumnOffset{}
prevOffset := sourcemap.LineColumnOffset{}

// Optionally strip whitespace
indent := ""
Expand All @@ -3814,7 +3809,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
// Start with the hashbang if there is one
if repr.ast.Hashbang != "" {
hashbang := repr.ast.Hashbang + "\n"
prevOffset.advanceString(hashbang)
prevOffset.AdvanceString(hashbang)
j.AddString(hashbang)
newlineBeforeComment = true
isExecutable = true
Expand All @@ -3823,15 +3818,15 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
// Add the top-level directive if present
if repr.ast.Directive != "" {
quoted := string(js_printer.QuoteForJSON(repr.ast.Directive, c.options.ASCIIOnly)) + ";" + newline
prevOffset.advanceString(quoted)
prevOffset.AdvanceString(quoted)
j.AddString(quoted)
newlineBeforeComment = true
}
}

if len(c.options.JSBanner) > 0 {
prevOffset.advanceString(c.options.JSBanner)
prevOffset.advanceString("\n")
prevOffset.AdvanceString(c.options.JSBanner)
prevOffset.AdvanceString("\n")
j.AddString(c.options.JSBanner)
j.AddString("\n")
}
Expand All @@ -3848,15 +3843,15 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
} else {
text += "(()" + space + "=>" + space + "{" + newline
}
prevOffset.advanceString(text)
prevOffset.AdvanceString(text)
j.AddString(text)
newlineBeforeComment = false
}

// Put the cross-chunk prefix inside the IIFE
if len(crossChunkPrefix) > 0 {
newlineBeforeComment = true
prevOffset.advanceBytes(crossChunkPrefix)
prevOffset.AdvanceBytes(crossChunkPrefix)
j.AddBytes(crossChunkPrefix)
}

Expand Down Expand Up @@ -3953,7 +3948,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
// Add a comment with the file path before the file contents
if c.options.Mode == config.ModeBundle && !c.options.RemoveWhitespace && prevComment != compileResult.sourceIndex && len(compileResult.JS) > 0 {
if newlineBeforeComment {
prevOffset.advanceString("\n")
prevOffset.AdvanceString("\n")
j.AddString("\n")
}

Expand All @@ -3968,14 +3963,14 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener
path = strings.ReplaceAll(path, "\u2029", "\\u2029")

text := fmt.Sprintf("%s// %s\n", indent, path)
prevOffset.advanceString(text)
prevOffset.AdvanceString(text)
j.AddString(text)
prevComment = compileResult.sourceIndex
}

// Don't include the runtime in source maps
if isRuntime {
prevOffset.advanceString(string(compileResult.JS))
prevOffset.AdvanceString(string(compileResult.JS))
j.AddBytes(compileResult.JS)
} else {
// Save the offset to the start of the stored JavaScript
Expand All @@ -3984,9 +3979,9 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener

// Ignore empty source map chunks
if compileResult.SourceMapChunk.ShouldIgnore {
prevOffset.advanceBytes(compileResult.JS)
prevOffset.AdvanceBytes(compileResult.JS)
} else {
prevOffset = lineColumnOffset{}
prevOffset = sourcemap.LineColumnOffset{}

// Include this file in the source map
if c.options.SourceMap != config.SourceMapNone {
Expand Down Expand Up @@ -4422,28 +4417,6 @@ func (repr *chunkReprCSS) generate(c *linkerContext, chunk *chunkInfo) func(gene
}
}

func (offset *lineColumnOffset) advanceBytes(bytes []byte) {
for i, n := 0, len(bytes); i < n; i++ {
if bytes[i] == '\n' {
offset.lines++
offset.columns = 0
} else {
offset.columns++
}
}
}

func (offset *lineColumnOffset) advanceString(text string) {
for i, n := 0, len(text); i < n; i++ {
if text[i] == '\n' {
offset.lines++
offset.columns = 0
} else {
offset.columns++
}
}
}

func preventBindingsFromBeingRenamed(binding js_ast.Binding, symbols js_ast.SymbolMap) {
switch b := binding.Data.(type) {
case *js_ast.BMissing:
Expand Down Expand Up @@ -4658,10 +4631,10 @@ func (c *linkerContext) generateSourceMapForChunk(
// here.
startState := js_printer.SourceMapState{
SourceIndex: sourcesIndex,
GeneratedLine: offset.lines,
GeneratedColumn: offset.columns,
GeneratedLine: offset.Lines,
GeneratedColumn: offset.Columns,
}
if offset.lines == 0 {
if offset.Lines == 0 {
startState.GeneratedColumn += prevColumnOffset
}

Expand Down
8 changes: 3 additions & 5 deletions internal/js_printer/js_printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,11 +662,9 @@ func GenerateLineOffsetTables(contents string, approximateLineCount int32) []Lin
switch c {
case '\r', '\n', '\u2028', '\u2029':
// Handle Windows-specific "\r\n" newlines
if c == '\r' {
if i+1 < len(contents) && contents[i+1] == '\n' {
column++
continue
}
if c == '\r' && i+1 < len(contents) && contents[i+1] == '\n' {
column++
continue
}

lineOffsetTables = append(lineOffsetTables, LineOffsetTable{
Expand Down
60 changes: 60 additions & 0 deletions internal/sourcemap/sourcemap.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package sourcemap

import (
"bytes"
"unicode/utf8"
)

type Mapping struct {
Expand Down Expand Up @@ -177,3 +178,62 @@ func DecodeVLQUTF16(encoded []uint16) (int, int, bool) {
}
return value, current, true
}

type LineColumnOffset struct {
Lines int
Columns int
}

func (offset *LineColumnOffset) AdvanceBytes(bytes []byte) {
columns := offset.Columns
for len(bytes) > 0 {
c, width := utf8.DecodeRune(bytes)
bytes = bytes[width:]
switch c {
case '\r', '\n', '\u2028', '\u2029':
// Handle Windows-specific "\r\n" newlines
if c == '\r' && len(bytes) > 0 && bytes[0] == '\n' {
columns++
continue
}

offset.Lines++
columns = 0

default:
// Mozilla's "source-map" library counts columns using UTF-16 code units
if c <= 0xFFFF {
columns++
} else {
columns += 2
}
}
}
offset.Columns = columns
}

func (offset *LineColumnOffset) AdvanceString(text string) {
columns := offset.Columns
for i, c := range text {
switch c {
case '\r', '\n', '\u2028', '\u2029':
// Handle Windows-specific "\r\n" newlines
if c == '\r' && i+1 < len(text) && text[i+1] == '\n' {
columns++
continue
}

offset.Lines++
columns = 0

default:
// Mozilla's "source-map" library counts columns using UTF-16 code units
if c <= 0xFFFF {
columns++
} else {
columns += 2
}
}
}
offset.Columns = columns
}
7 changes: 6 additions & 1 deletion scripts/verify-source-map.js
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,12 @@ async function main() {
crlf,
}),
check('unicode' + suffix, testCaseUnicode, toSearchUnicode, {
flags: flags.concat('--outfile=out.js', '--bundle'),
flags: flags.concat('--outfile=out.js', '--bundle', '--charset=utf8'),
entryPoints: ['entry.js'],
crlf,
}),
check('unicode-globalName' + suffix, testCaseUnicode, toSearchUnicode, {
flags: flags.concat('--outfile=out.js', '--bundle', '--global-name=πππ', '--charset=utf8'),
entryPoints: ['entry.js'],
crlf,
}),
Expand Down

0 comments on commit 7320a0e

Please sign in to comment.