diff --git a/format/simplify.go b/format/simplify.go index f2fd4d6..1176464 100644 --- a/format/simplify.go +++ b/format/simplify.go @@ -53,22 +53,26 @@ func (s simplifier) Visit(node ast.Node) ast.Visitor { // can be simplified to: s[a:] // if s is "simple enough" (for now we only accept identifiers) // - // Note: This may not be correct because len may have been redeclared in another - // file belonging to the same package. However, this is extremely unlikely - // and so far (April 2016, after years of supporting this rewrite feature) + // Note: This may not be correct because len may have been redeclared in + // the same package. However, this is extremely unlikely and so far + // (April 2022, after years of supporting this rewrite feature) // has never come up, so let's keep it working as is (see also #15153). + // + // Also note that this code used to use go/ast's object tracking, + // which was removed in exchange for go/parser.Mode.SkipObjectResolution. + // False positives are extremely unlikely as described above, + // and go/ast's object tracking is incomplete in any case. if n.Max != nil { // - 3-index slices always require the 2nd and 3rd index break } - if s, _ := n.X.(*ast.Ident); s != nil && s.Obj != nil { - // the array/slice object is a single, resolved identifier + if s, _ := n.X.(*ast.Ident); s != nil { + // the array/slice object is a single identifier if call, _ := n.High.(*ast.CallExpr); call != nil && len(call.Args) == 1 && !call.Ellipsis.IsValid() { // the high expression is a function call with a single argument - if fun, _ := call.Fun.(*ast.Ident); fun != nil && fun.Name == "len" && fun.Obj == nil { - // the function called is "len" and it is not locally defined; and - // because we don't have dot imports, it must be the predefined len() - if arg, _ := call.Args[0].(*ast.Ident); arg != nil && arg.Obj == s.Obj { + if fun, _ := call.Fun.(*ast.Ident); fun != nil && fun.Name == "len" { + // the function called is "len" + if arg, _ := call.Args[0].(*ast.Ident); arg != nil && arg.Name == s.Name { // the len argument is the array/slice object n.High = nil } diff --git a/gofmt.go b/gofmt.go index 6390d83..e0210a8 100644 --- a/gofmt.go +++ b/gofmt.go @@ -25,6 +25,8 @@ import ( "strings" "sync" + // TODO: we can soon use os/exec thanks to + // https://go.dev/issue/43724 "golang.org/x/sync/semaphore" exec "golang.org/x/sys/execabs" @@ -100,7 +102,7 @@ func usage() { } func initParserMode() { - parserMode = parser.ParseComments + parserMode = parser.ParseComments | parser.SkipObjectResolution if *allErrors { parserMode |= parser.AllErrors } @@ -348,12 +350,9 @@ func processFile(filename string, info fs.FileInfo, in io.Reader, r *reporter, e } } if *doDiff { - data, err := diffWithReplaceTempFile(src, res, filename) - if err != nil { - return fmt.Errorf("computing diff: %s", err) - } - fmt.Fprintf(r, "diff -u %s %s\n", filepath.ToSlash(filename+".orig"), filepath.ToSlash(filename)) - r.Write(data) + newName := filepath.ToSlash(filename) + oldName := newName + ".orig" + r.Write(diff.Diff(oldName, src, newName, res)) } } @@ -412,7 +411,12 @@ func readFile(filename string, info fs.FileInfo, in io.Reader) ([]byte, error) { // stop to avoid corrupting it.) src := make([]byte, size+1) n, err := io.ReadFull(in, src) - if err != nil && err != io.ErrUnexpectedEOF { + switch err { + case nil, io.EOF, io.ErrUnexpectedEOF: + // io.ReadFull returns io.EOF (for an empty file) or io.ErrUnexpectedEOF + // (for a non-empty file) if the file was changed unexpectedly. Continue + // with comparing file sizes in those cases. + default: return nil, err } if n < size { @@ -589,43 +593,6 @@ func fileWeight(path string, info fs.FileInfo) int64 { return info.Size() } -func diffWithReplaceTempFile(b1, b2 []byte, filename string) ([]byte, error) { - data, err := diff.Diff("gofumpt", b1, b2) - if len(data) > 0 { - return replaceTempFilename(data, filename) - } - return data, err -} - -// replaceTempFilename replaces temporary filenames in diff with actual one. -// -// --- /tmp/gofumpt316145376 2017-02-03 19:13:00.280468375 -0500 -// +++ /tmp/gofumpt617882815 2017-02-03 19:13:00.280468375 -0500 -// ... -// -> -// --- path/to/file.go.orig 2017-02-03 19:13:00.280468375 -0500 -// +++ path/to/file.go 2017-02-03 19:13:00.280468375 -0500 -// ... -func replaceTempFilename(diff []byte, filename string) ([]byte, error) { - bs := bytes.SplitN(diff, []byte{'\n'}, 3) - if len(bs) < 3 { - return nil, fmt.Errorf("got unexpected diff for %s", filename) - } - // Preserve timestamps. - var t0, t1 []byte - if i := bytes.LastIndexByte(bs[0], '\t'); i != -1 { - t0 = bs[0][i:] - } - if i := bytes.LastIndexByte(bs[1], '\t'); i != -1 { - t1 = bs[1][i:] - } - // Always print filepath with slash separator. - f := filepath.ToSlash(filename) - bs[0] = []byte(fmt.Sprintf("--- %s%s", f+".orig", t0)) - bs[1] = []byte(fmt.Sprintf("+++ %s%s", f, t1)) - return bytes.Join(bs, []byte{'\n'}), nil -} - const chmodSupported = runtime.GOOS != "windows" // backupFile writes data to a new file named filename with permissions perm, diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 6efdb82..47b2856 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -1,79 +1,261 @@ -// Copyright 2019 The Go Authors. All rights reserved. +// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package diff implements a Diff function that compare two inputs -// using the 'diff' tool. package diff import ( "bytes" - "io/ioutil" - "os" - "runtime" - - exec "golang.org/x/sys/execabs" + "fmt" + "sort" + "strings" ) -// Returns diff of two arrays of bytes in diff tool format. -func Diff(prefix string, b1, b2 []byte) ([]byte, error) { - f1, err := writeTempFile(prefix, b1) - if err != nil { - return nil, err +// A pair is a pair of values tracked for both the x and y side of a diff. +// It is typically a pair of line indexes. +type pair struct{ x, y int } + +// Diff returns an anchored diff of the two texts old and new +// in the “unified diff” format. If old and new are identical, +// Diff returns a nil slice (no output). +// +// Unix diff implementations typically look for a diff with +// the smallest number of lines inserted and removed, +// which can in the worst case take time quadratic in the +// number of lines in the texts. As a result, many implementations +// either can be made to run for a long time or cut off the search +// after a predetermined amount of work. +// +// In contrast, this implementation looks for a diff with the +// smallest number of “unique” lines inserted and removed, +// where unique means a line that appears just once in both old and new. +// We call this an “anchored diff” because the unique lines anchor +// the chosen matching regions. An anchored diff is usually clearer +// than a standard diff, because the algorithm does not try to +// reuse unrelated blank lines or closing braces. +// The algorithm also guarantees to run in O(n log n) time +// instead of the standard O(n²) time. +// +// Some systems call this approach a “patience diff,” named for +// the “patience sorting” algorithm, itself named for a solitaire card game. +// We avoid that name for two reasons. First, the name has been used +// for a few different variants of the algorithm, so it is imprecise. +// Second, the name is frequently interpreted as meaning that you have +// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm, +// when in fact the algorithm is faster than the standard one. +func Diff(oldName string, old []byte, newName string, new []byte) []byte { + if bytes.Equal(old, new) { + return nil } - defer os.Remove(f1) + x := lines(old) + y := lines(new) + + // Print diff header. + var out bytes.Buffer + fmt.Fprintf(&out, "diff %s %s\n", oldName, newName) + fmt.Fprintf(&out, "--- %s\n", oldName) + fmt.Fprintf(&out, "+++ %s\n", newName) + + // Loop over matches to consider, + // expanding each match to include surrounding lines, + // and then printing diff chunks. + // To avoid setup/teardown cases outside the loop, + // tgs returns a leading {0,0} and trailing {len(x), len(y)} pair + // in the sequence of matches. + var ( + done pair // printed up to x[:done.x] and y[:done.y] + chunk pair // start lines of current chunk + count pair // number of lines from each side in current chunk + ctext []string // lines for current chunk + ) + for _, m := range tgs(x, y) { + if m.x < done.x { + // Already handled scanning forward from earlier match. + continue + } - f2, err := writeTempFile(prefix, b2) - if err != nil { - return nil, err + // Expand matching lines as far possible, + // establishing that x[start.x:end.x] == y[start.y:end.y]. + // Note that on the first (or last) iteration we may (or definitey do) + // have an empty match: start.x==end.x and start.y==end.y. + start := m + for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] { + start.x-- + start.y-- + } + end := m + for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] { + end.x++ + end.y++ + } + + // Emit the mismatched lines before start into this chunk. + // (No effect on first sentinel iteration, when start = {0,0}.) + for _, s := range x[done.x:start.x] { + ctext = append(ctext, "-"+s) + count.x++ + } + for _, s := range y[done.y:start.y] { + ctext = append(ctext, "+"+s) + count.y++ + } + + // If we're not at EOF and have too few common lines, + // the chunk includes all the common lines and continues. + const C = 3 // number of context lines + if (end.x < len(x) || end.y < len(y)) && + (end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { + for _, s := range x[start.x:end.x] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = end + continue + } + + // End chunk with common lines for context. + if len(ctext) > 0 { + n := end.x - start.x + if n > C { + n = C + } + for _, s := range x[start.x : start.x+n] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = pair{start.x + n, start.y + n} + + // Format and emit chunk. + // Convert line numbers to 1-indexed. + // Special case: empty file shows up as 0,0 not 1,0. + if count.x > 0 { + chunk.x++ + } + if count.y > 0 { + chunk.y++ + } + fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y) + for _, s := range ctext { + out.WriteString(s) + } + count.x = 0 + count.y = 0 + ctext = ctext[:0] + } + + // If we reached EOF, we're done. + if end.x >= len(x) && end.y >= len(y) { + break + } + + // Otherwise start a new chunk. + chunk = pair{end.x - C, end.y - C} + for _, s := range x[chunk.x:end.x] { + ctext = append(ctext, " "+s) + count.x++ + count.y++ + } + done = end } - defer os.Remove(f2) - cmd := "diff" - if runtime.GOOS == "plan9" { - cmd = "/bin/ape/diff" + return out.Bytes() +} + +// lines returns the lines in the file x, including newlines. +// If the file does not end in a newline, one is supplied +// along with a warning about the missing newline. +func lines(x []byte) []string { + l := strings.SplitAfter(string(x), "\n") + if l[len(l)-1] == "" { + l = l[:len(l)-1] + } else { + // Treat last line as having a message about the missing newline attached, + // using the same text as BSD/GNU diff (including the leading backslash). + l[len(l)-1] += "\n\\ No newline at end of file\n" } + return l +} - data, err := exec.Command(cmd, "-u", f1, f2).CombinedOutput() - if len(data) > 0 { - // diff exits with a non-zero status when the files don't match. - // Ignore that failure as long as we get output. - err = nil +// tgs returns the pairs of indexes of the longest common subsequence +// of unique lines in x and y, where a unique line is one that appears +// once in x and once in y. +// +// The longest common subsequence algorithm is as described in +// Thomas G. Szymanski, “A Special Case of the Maximal Common +// Subsequence Problem,” Princeton TR #170 (January 1975), +// available at https://research.swtch.com/tgs170.pdf. +func tgs(x, y []string) []pair { + // Count the number of times each string appears in a and b. + // We only care about 0, 1, many, counted as 0, -1, -2 + // for the x side and 0, -4, -8 for the y side. + // Using negative numbers now lets us distinguish positive line numbers later. + m := make(map[string]int) + for _, s := range x { + if c := m[s]; c > -2 { + m[s] = c - 1 + } + } + for _, s := range y { + if c := m[s]; c > -8 { + m[s] = c - 4 + } } - // If we are on Windows and the diff is Cygwin diff, - // machines can get into a state where every Cygwin - // command works fine but prints a useless message like: + // Now unique strings can be identified by m[s] = -1+-4. // - // Cygwin WARNING: - // Couldn't compute FAST_CWD pointer. This typically occurs if you're using - // an older Cygwin version on a newer Windows. Please update to the latest - // available Cygwin version from https://cygwin.com/. If the problem persists, - // please see https://cygwin.com/problems.html - // - // Skip over that message and just return the actual diff. - if len(data) > 0 && !bytes.HasPrefix(data, []byte("--- ")) { - i := bytes.Index(data, []byte("\n--- ")) - if i >= 0 && i < 80*10 && bytes.Contains(data[:i], []byte("://cygwin.com/")) { - data = data[i+1:] + // Gather the indexes of those strings in x and y, building: + // xi[i] = increasing indexes of unique strings in x. + // yi[i] = increasing indexes of unique strings in y. + // inv[i] = index j such that x[xi[i]] = y[yi[j]]. + var xi, yi, inv []int + for i, s := range y { + if m[s] == -1+-4 { + m[s] = len(yi) + yi = append(yi, i) + } + } + for i, s := range x { + if j, ok := m[s]; ok && j >= 0 { + xi = append(xi, i) + inv = append(inv, j) } } - return data, err -} - -func writeTempFile(prefix string, data []byte) (string, error) { - file, err := ioutil.TempFile("", prefix) - if err != nil { - return "", err + // Apply Algorithm A from Szymanski's paper. + // In those terms, A = J = inv and B = [0, n). + // We add sentinel pairs {0,0}, and {len(x),len(y)} + // to the returned sequence, to help the processing loop. + J := inv + n := len(xi) + T := make([]int, n) + L := make([]int, n) + for i := range T { + T[i] = n + 1 + } + for i := 0; i < n; i++ { + k := sort.Search(n, func(k int) bool { + return T[k] >= J[i] + }) + T[k] = J[i] + L[i] = k + 1 } - _, err = file.Write(data) - if err1 := file.Close(); err == nil { - err = err1 + k := 0 + for _, v := range L { + if k < v { + k = v + } } - if err != nil { - os.Remove(file.Name()) - return "", err + seq := make([]pair, 2+k) + seq[1+k] = pair{len(x), len(y)} // sentinel at end + lastj := n + for i := n - 1; i >= 0; i-- { + if L[i] == k && J[i] < lastj { + seq[k] = pair{xi[i], yi[J[i]]} + k-- + } } - return file.Name(), nil + seq[0] = pair{0, 0} // sentinel at start + return seq }