From c5c3378d8544789f76de8a338a95307b0854e816 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 24 May 2021 17:33:36 -0700 Subject: [PATCH] Cleanup edit groups after coalescing (#259) Even with an optimal diffing algoritm, coalescing adjacent edit groups may cause the corresponding pair of strings for an edit group to have leading or trailing spans of equal elements. While this is technically a correct representation of a diff, it is a suboptimal outcome. As such, scan through all unequal groups and move leading/trailing equal spans to the preceding/succeeding equal group. Before this change: strings.Join({ "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa", - ",#=_value _value=2 ", + " _value=2 ", `11 org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb`, - ",#=_value _value=2 2", + " _value=2 2", `1 org-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc`, - ",#=_value", ` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2", - "=dd,#=_value", + "=dd", ` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`, - "=c,#=_value", + "=c", ` _value=4 41 `, }, "") After this change: strings.Join({ "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa", - ",#=_value", ` _value=2 11 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2=bb", - ",#=_value", ` _value=2 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=b,tag2=cc", - ",#=_value", ` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2=dd", - ",#=_value", ` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=c", - ",#=_value", ` _value=4 41 `, }, "") --- cmp/compare_test.go | 5 ++ cmp/report_slices.go | 152 ++++++++++++++++++++++++++++++++++++++++--- cmp/testdata/diffs | 19 ++++++ 3 files changed, 166 insertions(+), 10 deletions(-) diff --git a/cmp/compare_test.go b/cmp/compare_test.go index f7b1f13..7eac029 100644 --- a/cmp/compare_test.go +++ b/cmp/compare_test.go @@ -1302,6 +1302,11 @@ using the AllowUnexported option.`, "\n"), x: struct{ X interface{} }{[1]string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam sit amet pretium ligula, at gravida quam. Integer iaculis, velit at sagittis ultricies, lacus metus scelerisque turpis, ornare feugiat nulla nisl ac erat. Maecenas elementum ultricies libero, sed efficitur lacus molestie non. Nulla ac pretium dolor. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque mi lorem, consectetur id porttitor id, sollicitudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis."}}, y: struct{ X interface{} }{[1]string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam sit amet pretium ligula, at gravida quam. Integer iaculis, velit at sagittis ultricies, lacus metus scelerisque turpis, ornare feugiat nulla nisl ac erat. Maecenas elementum ultricies libero, sed efficitur lacus molestie non. Nulla ac pretium dolor. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque mi lorem, consectetur id porttitor id, sollicitudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis,"}}, reason: "printing a large standalone string that is different should print enough context to see the difference", + }, { + label: label + "/SurroundingEqualElements", + x: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa,#=_value _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb,#=_value _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc,#=_value _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd,#=_value _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c,#=_value _value=4 41\t", + y: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c _value=4 41\t", + reason: "leading/trailing equal spans should not appear in diff lines", }} } diff --git a/cmp/report_slices.go b/cmp/report_slices.go index 168f92f..f985cc9 100644 --- a/cmp/report_slices.go +++ b/cmp/report_slices.go @@ -338,8 +338,11 @@ func (opts formatOptions) formatDiffSlice( vx, vy reflect.Value, chunkSize int, name string, makeRec func(reflect.Value, diffMode) textRecord, ) (list textList) { - es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result { - return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface()) + eq := func(ix, iy int) bool { + return vx.Index(ix).Interface() == vy.Index(iy).Interface() + } + es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result { + return diff.BoolResult(eq(ix, iy)) }) appendChunks := func(v reflect.Value, d diffMode) int { @@ -364,6 +367,7 @@ func (opts formatOptions) formatDiffSlice( groups := coalesceAdjacentEdits(name, es) groups = coalesceInterveningIdentical(groups, chunkSize/4) + groups = cleanupSurroundingIdentical(groups, eq) maxGroup := diffStats{Name: name} for i, ds := range groups { if maxLen >= 0 && numDiffs >= maxLen { @@ -416,25 +420,36 @@ func (opts formatOptions) formatDiffSlice( // coalesceAdjacentEdits coalesces the list of edits into groups of adjacent // equal or unequal counts. +// +// Example: +// +// Input: "..XXY...Y" +// Output: [ +// {NumIdentical: 2}, +// {NumRemoved: 2, NumInserted 1}, +// {NumIdentical: 3}, +// {NumInserted: 1}, +// ] +// func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { - var prevCase int // Arbitrary index into which case last occurred - lastStats := func(i int) *diffStats { - if prevCase != i { + var prevMode byte + lastStats := func(mode byte) *diffStats { + if prevMode != mode { groups = append(groups, diffStats{Name: name}) - prevCase = i + prevMode = mode } return &groups[len(groups)-1] } for _, e := range es { switch e { case diff.Identity: - lastStats(1).NumIdentical++ + lastStats('=').NumIdentical++ case diff.UniqueX: - lastStats(2).NumRemoved++ + lastStats('!').NumRemoved++ case diff.UniqueY: - lastStats(2).NumInserted++ + lastStats('!').NumInserted++ case diff.Modified: - lastStats(2).NumModified++ + lastStats('!').NumModified++ } } return groups @@ -444,6 +459,35 @@ func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) // equal groups into adjacent unequal groups that currently result in a // dual inserted/removed printout. This acts as a high-pass filter to smooth // out high-frequency changes within the windowSize. +// +// Example: +// +// WindowSize: 16, +// Input: [ +// {NumIdentical: 61}, // group 0 +// {NumRemoved: 3, NumInserted: 1}, // group 1 +// {NumIdentical: 6}, // ├── coalesce +// {NumInserted: 2}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 9}, // └── coalesce +// {NumIdentical: 64}, // group 2 +// {NumRemoved: 3, NumInserted: 1}, // group 3 +// {NumIdentical: 6}, // ├── coalesce +// {NumInserted: 2}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 7}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 2}, // └── coalesce +// {NumIdentical: 63}, // group 4 +// ] +// Output: [ +// {NumIdentical: 61}, +// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, +// {NumIdentical: 64}, +// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3}, +// {NumIdentical: 63}, +// ] +// func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { groups, groupsOrig := groups[:0], groups for i, ds := range groupsOrig { @@ -463,3 +507,91 @@ func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStat } return groups } + +// cleanupSurroundingIdentical scans through all unequal groups, and +// moves any leading sequence of equal elements to the preceding equal group and +// moves and trailing sequence of equal elements to the succeeding equal group. +// +// This is necessary since coalesceInterveningIdentical may coalesce edit groups +// together such that leading/trailing spans of equal elements becomes possible. +// Note that this can occur even with an optimal diffing algorithm. +// +// Example: +// +// Input: [ +// {NumIdentical: 61}, +// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements +// {NumIdentical: 67}, +// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements +// {NumIdentical: 54}, +// ] +// Output: [ +// {NumIdentical: 64}, // incremented by 3 +// {NumRemoved: 9}, +// {NumIdentical: 67}, +// {NumRemoved: 9}, +// {NumIdentical: 64}, // incremented by 10 +// ] +// +func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats { + var ix, iy int // indexes into sequence x and y + for i, ds := range groups { + // Handle equal group. + if ds.NumDiff() == 0 { + ix += ds.NumIdentical + iy += ds.NumIdentical + continue + } + + // Handle unequal group. + nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified + ny := ds.NumIdentical + ds.NumInserted + ds.NumModified + var numLeadingIdentical, numTrailingIdentical int + for i := 0; i < nx && i < ny && eq(ix+i, iy+i); i++ { + numLeadingIdentical++ + } + for i := 0; i < nx && i < ny && eq(ix+nx-1-i, iy+ny-1-i); i++ { + numTrailingIdentical++ + } + if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 { + if numLeadingIdentical > 0 { + // Remove leading identical span from this group and + // insert it into the preceding group. + if i-1 >= 0 { + groups[i-1].NumIdentical += numLeadingIdentical + } else { + // No preceding group exists, so prepend a new group, + // but do so after we finish iterating over all groups. + defer func() { + groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...) + }() + } + // Increment indexes since the preceding group would have handled this. + ix += numLeadingIdentical + iy += numLeadingIdentical + } + if numTrailingIdentical > 0 { + // Remove trailing identical span from this group and + // insert it into the succeeding group. + if i+1 < len(groups) { + groups[i+1].NumIdentical += numTrailingIdentical + } else { + // No succeeding group exists, so append a new group, + // but do so after we finish iterating over all groups. + defer func() { + groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical}) + }() + } + // Do not increment indexes since the succeeding group will handle this. + } + + // Update this group since some identical elements were removed. + nx -= numIdentical + ny -= numIdentical + groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny} + } + ix += nx + iy += ny + } + return groups +} diff --git a/cmp/testdata/diffs b/cmp/testdata/diffs index 05118be..2d9f9ab 100644 --- a/cmp/testdata/diffs +++ b/cmp/testdata/diffs @@ -1046,6 +1046,25 @@ + }, } >>> TestDiff/Reporter/LargeStandaloneString +<<< TestDiff/Reporter/SurroundingEqualElements + strings.Join({ + "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa", +- ",#=_value", + ` _value=2 11 org-4747474747474747,bucket-4242424242424242:m,tag1`, + "=a,tag2=bb", +- ",#=_value", + ` _value=2 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, + "=b,tag2=cc", +- ",#=_value", + ` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, + "=a,tag2=dd", +- ",#=_value", + ` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`, + "=c", +- ",#=_value", + ` _value=4 41 `, + }, "") +>>> TestDiff/Reporter/SurroundingEqualElements <<< TestDiff/EmbeddedStruct/ParentStructA/Inequal teststructs.ParentStructA{ privateStruct: teststructs.privateStruct{