diff --git a/CHANGELOG.md b/CHANGELOG.md index 0001c9225d1..5ec13c8d61f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,12 @@ Using newer JavaScript syntax with an older target environment (e.g. `chrome10`) can cause a build error if esbuild doesn't support transforming that syntax such that it is compatible with that target environment. Previously the error message was generic but with this release, the target environment is called outp explicitly in the error message. This is helpful if esbuild is being wrapped by some other tool since the other tool can obscure what target environment is actually being passed to esbuild. +* Fix an issue with Unicode and source maps + + This release fixes a bug where non-ASCII content that ended up in an output file but that was not part of an input file could throw off source mappings. An example of this would be passing a string containing non-ASCII characters to the `globalName` setting with the `minify` setting active and the `charset` setting set to `utf8`. The conditions for this bug are fairly specific and unlikely to be hit, so it's unsurprising that this issue hasn't been discovered earlier. It's also unlikely that this issue affected real-world code. + + The underlying cause is that while the meaning of column numbers in source maps is undefined in the specification, in practice most tools treat it as the number of UTF-16 code units from the start of the line. The bug happened because column increments for outside-of-file characters were incorrectly counted using byte offsets instead of UTF-16 code unit counts. + ## 0.9.2 * Fix export name annotations in CommonJS output for node ([#960](https://github.com/evanw/esbuild/issues/960)) diff --git a/internal/bundler/linker.go b/internal/bundler/linker.go index f914ebeb167..db8ff42dcd7 100644 --- a/internal/bundler/linker.go +++ b/internal/bundler/linker.go @@ -3379,11 +3379,6 @@ type stmtList struct { entryPointTail []js_ast.Stmt } -type lineColumnOffset struct { - lines int - columns int -} - type compileResultJS struct { js_printer.PrintResult @@ -3396,7 +3391,7 @@ type compileResultJS struct { // This is the line and column offset since the previous JavaScript string // or the start of the file if this is the first JavaScript string. - generatedOffset lineColumnOffset + generatedOffset sourcemap.LineColumnOffset } func (c *linkerContext) generateCodeForFileInChunkJS( @@ -3795,7 +3790,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener waitGroup.Wait() j := js_printer.Joiner{} - prevOffset := lineColumnOffset{} + prevOffset := sourcemap.LineColumnOffset{} // Optionally strip whitespace indent := "" @@ -3814,7 +3809,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener // Start with the hashbang if there is one if repr.ast.Hashbang != "" { hashbang := repr.ast.Hashbang + "\n" - prevOffset.advanceString(hashbang) + prevOffset.AdvanceString(hashbang) j.AddString(hashbang) newlineBeforeComment = true isExecutable = true @@ -3823,15 +3818,15 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener // Add the top-level directive if present if repr.ast.Directive != "" { quoted := string(js_printer.QuoteForJSON(repr.ast.Directive, c.options.ASCIIOnly)) + ";" + newline - prevOffset.advanceString(quoted) + prevOffset.AdvanceString(quoted) j.AddString(quoted) newlineBeforeComment = true } } if len(c.options.JSBanner) > 0 { - prevOffset.advanceString(c.options.JSBanner) - prevOffset.advanceString("\n") + prevOffset.AdvanceString(c.options.JSBanner) + prevOffset.AdvanceString("\n") j.AddString(c.options.JSBanner) j.AddString("\n") } @@ -3848,7 +3843,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener } else { text += "(()" + space + "=>" + space + "{" + newline } - prevOffset.advanceString(text) + prevOffset.AdvanceString(text) j.AddString(text) newlineBeforeComment = false } @@ -3856,7 +3851,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener // Put the cross-chunk prefix inside the IIFE if len(crossChunkPrefix) > 0 { newlineBeforeComment = true - prevOffset.advanceBytes(crossChunkPrefix) + prevOffset.AdvanceBytes(crossChunkPrefix) j.AddBytes(crossChunkPrefix) } @@ -3953,7 +3948,7 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener // Add a comment with the file path before the file contents if c.options.Mode == config.ModeBundle && !c.options.RemoveWhitespace && prevComment != compileResult.sourceIndex && len(compileResult.JS) > 0 { if newlineBeforeComment { - prevOffset.advanceString("\n") + prevOffset.AdvanceString("\n") j.AddString("\n") } @@ -3968,14 +3963,14 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener path = strings.ReplaceAll(path, "\u2029", "\\u2029") text := fmt.Sprintf("%s// %s\n", indent, path) - prevOffset.advanceString(text) + prevOffset.AdvanceString(text) j.AddString(text) prevComment = compileResult.sourceIndex } // Don't include the runtime in source maps if isRuntime { - prevOffset.advanceString(string(compileResult.JS)) + prevOffset.AdvanceString(string(compileResult.JS)) j.AddBytes(compileResult.JS) } else { // Save the offset to the start of the stored JavaScript @@ -3984,9 +3979,9 @@ func (repr *chunkReprJS) generate(c *linkerContext, chunk *chunkInfo) func(gener // Ignore empty source map chunks if compileResult.SourceMapChunk.ShouldIgnore { - prevOffset.advanceBytes(compileResult.JS) + prevOffset.AdvanceBytes(compileResult.JS) } else { - prevOffset = lineColumnOffset{} + prevOffset = sourcemap.LineColumnOffset{} // Include this file in the source map if c.options.SourceMap != config.SourceMapNone { @@ -4422,28 +4417,6 @@ func (repr *chunkReprCSS) generate(c *linkerContext, chunk *chunkInfo) func(gene } } -func (offset *lineColumnOffset) advanceBytes(bytes []byte) { - for i, n := 0, len(bytes); i < n; i++ { - if bytes[i] == '\n' { - offset.lines++ - offset.columns = 0 - } else { - offset.columns++ - } - } -} - -func (offset *lineColumnOffset) advanceString(text string) { - for i, n := 0, len(text); i < n; i++ { - if text[i] == '\n' { - offset.lines++ - offset.columns = 0 - } else { - offset.columns++ - } - } -} - func preventBindingsFromBeingRenamed(binding js_ast.Binding, symbols js_ast.SymbolMap) { switch b := binding.Data.(type) { case *js_ast.BMissing: @@ -4658,10 +4631,10 @@ func (c *linkerContext) generateSourceMapForChunk( // here. startState := js_printer.SourceMapState{ SourceIndex: sourcesIndex, - GeneratedLine: offset.lines, - GeneratedColumn: offset.columns, + GeneratedLine: offset.Lines, + GeneratedColumn: offset.Columns, } - if offset.lines == 0 { + if offset.Lines == 0 { startState.GeneratedColumn += prevColumnOffset } diff --git a/internal/js_printer/js_printer.go b/internal/js_printer/js_printer.go index 77e5fa11874..3d8dbcdb29d 100644 --- a/internal/js_printer/js_printer.go +++ b/internal/js_printer/js_printer.go @@ -662,11 +662,9 @@ func GenerateLineOffsetTables(contents string, approximateLineCount int32) []Lin switch c { case '\r', '\n', '\u2028', '\u2029': // Handle Windows-specific "\r\n" newlines - if c == '\r' { - if i+1 < len(contents) && contents[i+1] == '\n' { - column++ - continue - } + if c == '\r' && i+1 < len(contents) && contents[i+1] == '\n' { + column++ + continue } lineOffsetTables = append(lineOffsetTables, LineOffsetTable{ diff --git a/internal/sourcemap/sourcemap.go b/internal/sourcemap/sourcemap.go index 87c1eb47a68..962be9a3b0a 100644 --- a/internal/sourcemap/sourcemap.go +++ b/internal/sourcemap/sourcemap.go @@ -2,6 +2,7 @@ package sourcemap import ( "bytes" + "unicode/utf8" ) type Mapping struct { @@ -177,3 +178,62 @@ func DecodeVLQUTF16(encoded []uint16) (int, int, bool) { } return value, current, true } + +type LineColumnOffset struct { + Lines int + Columns int +} + +func (offset *LineColumnOffset) AdvanceBytes(bytes []byte) { + columns := offset.Columns + for len(bytes) > 0 { + c, width := utf8.DecodeRune(bytes) + bytes = bytes[width:] + switch c { + case '\r', '\n', '\u2028', '\u2029': + // Handle Windows-specific "\r\n" newlines + if c == '\r' && len(bytes) > 0 && bytes[0] == '\n' { + columns++ + continue + } + + offset.Lines++ + columns = 0 + + default: + // Mozilla's "source-map" library counts columns using UTF-16 code units + if c <= 0xFFFF { + columns++ + } else { + columns += 2 + } + } + } + offset.Columns = columns +} + +func (offset *LineColumnOffset) AdvanceString(text string) { + columns := offset.Columns + for i, c := range text { + switch c { + case '\r', '\n', '\u2028', '\u2029': + // Handle Windows-specific "\r\n" newlines + if c == '\r' && i+1 < len(text) && text[i+1] == '\n' { + columns++ + continue + } + + offset.Lines++ + columns = 0 + + default: + // Mozilla's "source-map" library counts columns using UTF-16 code units + if c <= 0xFFFF { + columns++ + } else { + columns += 2 + } + } + } + offset.Columns = columns +} diff --git a/scripts/verify-source-map.js b/scripts/verify-source-map.js index ba0fee5143d..6b175145bdb 100644 --- a/scripts/verify-source-map.js +++ b/scripts/verify-source-map.js @@ -450,7 +450,12 @@ async function main() { crlf, }), check('unicode' + suffix, testCaseUnicode, toSearchUnicode, { - flags: flags.concat('--outfile=out.js', '--bundle'), + flags: flags.concat('--outfile=out.js', '--bundle', '--charset=utf8'), + entryPoints: ['entry.js'], + crlf, + }), + check('unicode-globalName' + suffix, testCaseUnicode, toSearchUnicode, { + flags: flags.concat('--outfile=out.js', '--bundle', '--global-name=πππ', '--charset=utf8'), entryPoints: ['entry.js'], crlf, }),