From 18c5d483e040107cdcf23e457ab7dfe87ec4257a Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 23 Nov 2020 13:54:08 -0800 Subject: [PATCH] Fix non-determinism in diffing algorithm A previous attempt to add non-determinism to the diffing algorithm unfortunately broke the algorithm for half the cases. This change modifies the algorithm to truly switch between starting with a forward search versus a reverse search. The main for-loop of Difference would switch repeatedly between performing a forward search, then a reverse search, and vice-versa. Since we can't jump into the middle of a for-loop to start with the reverse search first, we use a series of labels and goto statements to accomplish the same effect. Fixes #238 --- cmp/internal/diff/diff.go | 48 +++++++++++++++++++--------------- cmp/internal/diff/diff_test.go | 27 +++++++++---------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/cmp/internal/diff/diff.go b/cmp/internal/diff/diff.go index 441953e..bc196b1 100644 --- a/cmp/internal/diff/diff.go +++ b/cmp/internal/diff/diff.go @@ -119,7 +119,7 @@ func (r Result) Similar() bool { return r.NumSame+1 >= r.NumDiff } -var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) +var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0 // Difference reports whether two lists of lengths nx and ny are equal // given the definition of equality provided as f. @@ -168,17 +168,6 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) { // A vertical edge is equivalent to inserting a symbol from list Y. // A diagonal edge is equivalent to a matching symbol between both X and Y. - // To ensure flexibility in changing the algorithm in the future, - // introduce some degree of deliberate instability. - // This is achieved by fiddling the zigzag iterator to start searching - // the graph starting from the bottom-right versus than the top-left. - // The result may differ depending on the starting search location, - // but still produces a valid edit script. - zigzagInit := randInt // either 0 or 1 - if flags.Deterministic { - zigzagInit = 0 - } - // Invariants: // • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx // • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny @@ -197,6 +186,11 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) { // approximately the square-root of the search budget. searchBudget := 4 * (nx + ny) // O(n) + // Running the tests with the "cmp_debug" build tag prints a visualization + // of the algorithm running in real-time. This is educational for + // understanding how the algorithm works. See debug_enable.go. + f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es) + // The algorithm below is a greedy, meet-in-the-middle algorithm for // computing sub-optimal edit-scripts between two lists. // @@ -214,22 +208,28 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) { // frontier towards the opposite corner. // • This algorithm terminates when either the X coordinates or the // Y coordinates of the forward and reverse frontier points ever intersect. - // + // This algorithm is correct even if searching only in the forward direction // or in the reverse direction. We do both because it is commonly observed // that two lists commonly differ because elements were added to the front // or end of the other list. // - // Running the tests with the "cmp_debug" build tag prints a visualization - // of the algorithm running in real-time. This is educational for - // understanding how the algorithm works. See debug_enable.go. - f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es) - for { + // Non-deterministically start with either the forward or reverse direction + // to introduce some deliberate instability so that we have the flexibility + // to change this algorithm in the future. + if flags.Deterministic || randBool { + goto forwardSearch + } else { + goto reverseSearch + } + +forwardSearch: + { // Forward search from the beginning. if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 { - break + goto finishSearch } - for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ { + for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ { // Search in a diagonal pattern for a match. z := zigzag(i) p := point{fwdFrontier.X + z, fwdFrontier.Y - z} @@ -262,10 +262,14 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) { } else { fwdFrontier.Y++ } + goto reverseSearch + } +reverseSearch: + { // Reverse search from the end. if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 { - break + goto finishSearch } for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ { // Search in a diagonal pattern for a match. @@ -300,8 +304,10 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) { } else { revFrontier.Y-- } + goto forwardSearch } +finishSearch: // Join the forward and reverse paths and then append the reverse path. fwdPath.connect(revPath.point, f) for i := len(revPath.es) - 1; i >= 0; i-- { diff --git a/cmp/internal/diff/diff_test.go b/cmp/internal/diff/diff_test.go index d97fef8..eacf072 100644 --- a/cmp/internal/diff/diff_test.go +++ b/cmp/internal/diff/diff_test.go @@ -10,21 +10,15 @@ import ( "strings" "testing" "unicode" - - "github.com/google/go-cmp/cmp/internal/flags" ) -func init() { - flags.Deterministic = true -} - func TestDifference(t *testing.T) { tests := []struct { // Before passing x and y to Difference, we strip all spaces so that // they can be used by the test author to indicate a missing symbol // in one of the lists. x, y string - want string + want string // '|' separated list of possible outputs }{{ x: "", y: "", @@ -36,7 +30,7 @@ func TestDifference(t *testing.T) { }, { x: "##", y: "# ", - want: ".X", + want: ".X|X.", }, { x: "a#", y: "A ", @@ -48,7 +42,7 @@ func TestDifference(t *testing.T) { }, { x: "# ", y: "##", - want: ".Y", + want: ".Y|Y.", }, { x: " #", y: "@#", @@ -148,7 +142,7 @@ func TestDifference(t *testing.T) { }, { x: "ABCAB BA ", y: " C BABAC", - want: "XX.X.Y..Y", + want: "XX.X.Y..Y|XX.Y.X..Y", }, { x: "# #### ###", y: "#y####yy###", @@ -164,7 +158,7 @@ func TestDifference(t *testing.T) { }, { x: "0 12z3x 456789 x x 0", y: "0y12Z3 y456789y y y0", - want: ".Y..M.XY......YXYXY.", + want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.", }, { x: "0 2 4 6 8 ..................abXXcdEXF.ghXi", y: " 1 3 5 7 9..................AB CDE F.GH I", @@ -216,7 +210,7 @@ func TestDifference(t *testing.T) { }, { x: "0123456789 ", y: " 5678901234", - want: "XXXXX.....YYYYY", + want: "XXXXX.....YYYYY|YYYYY.....XXXXX", }, { x: "0123456789 ", y: " 4567890123", @@ -252,9 +246,14 @@ func TestDifference(t *testing.T) { x := strings.Replace(tt.x, " ", "", -1) y := strings.Replace(tt.y, " ", "", -1) es := testStrings(t, x, y) - if got := es.String(); got != tt.want { - t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want) + var want string + got := es.String() + for _, want = range strings.Split(tt.want, "|") { + if got == want { + return + } } + t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want) }) } }