From 307d5b2e49ad28a20f5f6ccc444c9c84503bcaba Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Tue, 10 Dec 2019 13:22:38 +0100 Subject: [PATCH 1/8] get len of []rune instead of string --- diffmatchpatch/diff.go | 22 +++++++++++----------- diffmatchpatch/diff_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index cb25b43..f70d6f3 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -670,16 +670,16 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { // An insertion or deletion. if diffs[pointer].Type == DiffInsert { - lengthInsertions2 += len(diffs[pointer].Text) + lengthInsertions2 += len([]rune(diffs[pointer].Text)) } else { - lengthDeletions2 += len(diffs[pointer].Text) + lengthDeletions2 += len([]rune(diffs[pointer].Text)) } // Eliminate an equality that is smaller or equal to the edits on both sides of it. difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) - if len(lastequality) > 0 && - (len(lastequality) <= difference1) && - (len(lastequality) <= difference2) { + if len([]rune(lastequality)) > 0 && + (len([]rune(lastequality)) <= difference1) && + (len([]rune(lastequality)) <= difference2) { // Duplicate record. insPoint := equalities[len(equalities)-1] diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) @@ -728,24 +728,24 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) if overlapLength1 >= overlapLength2 { - if float64(overlapLength1) >= float64(len(deletion))/2 || - float64(overlapLength1) >= float64(len(insertion))/2 { + if float64(overlapLength1) >= float64(len([]rune(deletion)))/2 || + float64(overlapLength1) >= float64(len([]rune(insertion)))/2 { // Overlap found. Insert an equality and trim the surrounding edits. diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) diffs[pointer-1].Text = - deletion[0 : len(deletion)-overlapLength1] + deletion[0 : len([]rune(deletion))-overlapLength1] diffs[pointer+1].Text = insertion[overlapLength1:] pointer++ } } else { - if float64(overlapLength2) >= float64(len(deletion))/2 || - float64(overlapLength2) >= float64(len(insertion))/2 { + if float64(overlapLength2) >= float64(len([]rune(deletion)))/2 || + float64(overlapLength2) >= float64(len([]rune(insertion)))/2 { // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. overlap := Diff{DiffEqual, deletion[:overlapLength2]} diffs = splice(diffs, pointer, 0, overlap) diffs[pointer-1].Type = DiffInsert - diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] + diffs[pointer-1].Text = insertion[0 : len([]rune(insertion))-overlapLength2] diffs[pointer+1].Type = DiffDelete diffs[pointer+1].Text = deletion[overlapLength2:] pointer++ diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 5c165b1..6c159cb 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -821,6 +821,30 @@ func TestDiffCleanupSemantic(t *testing.T) { {DiffDelete, " deal"}, }, }, + { + "Taken from python / CPP library", + []Diff{ + {DiffInsert, "星球大戰:新的希望 "}, + {DiffEqual, "star wars: "}, + {DiffDelete, "episodio iv - un"}, + {DiffEqual, "a n"}, + {DiffDelete, "u"}, + {DiffEqual, "e"}, + {DiffDelete, "va"}, + {DiffInsert, "w"}, + {DiffEqual, " "}, + {DiffDelete, "es"}, + {DiffInsert, "ho"}, + {DiffEqual, "pe"}, + {DiffDelete, "ranza"}, + }, + []Diff{ + {DiffInsert, "星球大戰:新的希望 "}, + {DiffEqual, "star wars: "}, + {DiffDelete, "episodio iv - una nueva esperanza"}, + {DiffInsert, "a new hope"}, + }, + }, } { actual := dmp.DiffCleanupSemantic(tc.Diffs) assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) From 5b055fab6731b98b47b19da827b2975012958398 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Tue, 10 Dec 2019 15:03:27 +0100 Subject: [PATCH 2/8] fix --- diffmatchpatch/diff.go | 4 ++-- diffmatchpatch/diff_test.go | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index f70d6f3..16b03f2 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -500,8 +500,8 @@ func commonSuffixLength(text1, text2 []rune) int { // DiffCommonOverlap determines if the suffix of one string is the prefix of another. func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { // Cache the text lengths to prevent multiple calls. - text1Length := len(text1) - text2Length := len(text2) + text1Length := len([]rune(text1)) + text2Length := len([]rune(text2)) // Eliminate the null case. if text1Length == 0 || text2Length == 0 { return 0 diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 6c159cb..e3689ae 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -845,6 +845,18 @@ func TestDiffCleanupSemantic(t *testing.T) { {DiffInsert, "a new hope"}, }, }, + { + "panic", + []Diff{ + {DiffInsert, "킬러 인 "}, + {DiffEqual, "리커버리"}, + {DiffDelete, " 보이즈"}, + }, + []Diff{ + {DiffDelete, "리커버리 보이즈"}, + {DiffInsert, "킬러 인 리커버리"}, + }, + }, } { actual := dmp.DiffCleanupSemantic(tc.Diffs) assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) From 6895bfd4fc53d83e49476bb8626c537ba49ac11b Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Thu, 12 Dec 2019 11:30:59 +0100 Subject: [PATCH 3/8] - --- diffmatchpatch/diff.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 16b03f2..f70d6f3 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -500,8 +500,8 @@ func commonSuffixLength(text1, text2 []rune) int { // DiffCommonOverlap determines if the suffix of one string is the prefix of another. func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { // Cache the text lengths to prevent multiple calls. - text1Length := len([]rune(text1)) - text2Length := len([]rune(text2)) + text1Length := len(text1) + text2Length := len(text2) // Eliminate the null case. if text1Length == 0 || text2Length == 0 { return 0 From 72a1ad41d13b6381c18137d8c31f7203071eb9d1 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Thu, 12 Dec 2019 11:38:47 +0100 Subject: [PATCH 4/8] - --- diffmatchpatch/diff.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index f70d6f3..01c4b2c 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -728,24 +728,24 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) if overlapLength1 >= overlapLength2 { - if float64(overlapLength1) >= float64(len([]rune(deletion)))/2 || - float64(overlapLength1) >= float64(len([]rune(insertion)))/2 { + if float64(overlapLength1) >= float64(len(deletion))/2 || + float64(overlapLength1) >= float64(len(insertion))/2 { // Overlap found. Insert an equality and trim the surrounding edits. diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) diffs[pointer-1].Text = - deletion[0 : len([]rune(deletion))-overlapLength1] + deletion[0 : len(deletion)-overlapLength1] diffs[pointer+1].Text = insertion[overlapLength1:] pointer++ } } else { - if float64(overlapLength2) >= float64(len([]rune(deletion)))/2 || - float64(overlapLength2) >= float64(len([]rune(insertion)))/2 { + if float64(overlapLength2) >= float64(len(deletion))/2 || + float64(overlapLength2) >= float64(len(insertion))/2 { // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. overlap := Diff{DiffEqual, deletion[:overlapLength2]} diffs = splice(diffs, pointer, 0, overlap) diffs[pointer-1].Type = DiffInsert - diffs[pointer-1].Text = insertion[0 : len([]rune(insertion))-overlapLength2] + diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] diffs[pointer+1].Type = DiffDelete diffs[pointer+1].Text = deletion[overlapLength2:] pointer++ From 066c0e613093ffd86c200718122222bf1d48a311 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Thu, 12 Dec 2019 11:43:21 +0100 Subject: [PATCH 5/8] fix test --- diffmatchpatch/diff_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index e3689ae..2bb5c5a 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -853,8 +853,9 @@ func TestDiffCleanupSemantic(t *testing.T) { {DiffDelete, " 보이즈"}, }, []Diff{ - {DiffDelete, "리커버리 보이즈"}, - {DiffInsert, "킬러 인 리커버리"}, + {DiffInsert, "킬러 인 "}, + {DiffEqual, "리커버리"}, + {DiffDelete, " 보이즈"}, }, }, } { From e614d5087c0ef24ba4abd3e0693c762be09b5e51 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Thu, 12 Dec 2019 14:25:56 +0100 Subject: [PATCH 6/8] len calculation --- diffmatchpatch/diff.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 01c4b2c..b03f7e3 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -739,8 +739,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { pointer++ } } else { - if float64(overlapLength2) >= float64(len(deletion))/2 || - float64(overlapLength2) >= float64(len(insertion))/2 { + if float64(overlapLength2) >= float64(len([]rune(deletion)))/2 || + float64(overlapLength2) >= float64(len([]rune(insertion)))/2 { // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. overlap := Diff{DiffEqual, deletion[:overlapLength2]} diffs = splice(diffs, pointer, 0, overlap) From c6eeac79a2c459994fa7a1f5834fad9ebac3c231 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Thu, 12 Dec 2019 14:26:17 +0100 Subject: [PATCH 7/8] len calculation --- diffmatchpatch/diff.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index b03f7e3..a7dfa08 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -728,8 +728,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) if overlapLength1 >= overlapLength2 { - if float64(overlapLength1) >= float64(len(deletion))/2 || - float64(overlapLength1) >= float64(len(insertion))/2 { + if float64(overlapLength1) >= float64(len([]rune(deletion)))/2 || + float64(overlapLength1) >= float64(len([]rune(insertion)))/2 { // Overlap found. Insert an equality and trim the surrounding edits. diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) From e013302309a82837ec242c5b47fe6b28bdd12202 Mon Sep 17 00:00:00 2001 From: Patrick Kohan Date: Tue, 10 Mar 2020 12:14:30 +0100 Subject: [PATCH 8/8] use utf8.RuneCountInString --- diffmatchpatch/diff.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index a7dfa08..13b38e1 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -670,16 +670,16 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { // An insertion or deletion. if diffs[pointer].Type == DiffInsert { - lengthInsertions2 += len([]rune(diffs[pointer].Text)) + lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text) } else { - lengthDeletions2 += len([]rune(diffs[pointer].Text)) + lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text) } // Eliminate an equality that is smaller or equal to the edits on both sides of it. difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) - if len([]rune(lastequality)) > 0 && - (len([]rune(lastequality)) <= difference1) && - (len([]rune(lastequality)) <= difference2) { + if utf8.RuneCountInString(lastequality) > 0 && + (utf8.RuneCountInString(lastequality) <= difference1) && + (utf8.RuneCountInString(lastequality) <= difference2) { // Duplicate record. insPoint := equalities[len(equalities)-1] diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) @@ -728,8 +728,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) if overlapLength1 >= overlapLength2 { - if float64(overlapLength1) >= float64(len([]rune(deletion)))/2 || - float64(overlapLength1) >= float64(len([]rune(insertion)))/2 { + if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 || + float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 { // Overlap found. Insert an equality and trim the surrounding edits. diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) @@ -739,8 +739,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { pointer++ } } else { - if float64(overlapLength2) >= float64(len([]rune(deletion)))/2 || - float64(overlapLength2) >= float64(len([]rune(insertion)))/2 { + if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 || + float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 { // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. overlap := Diff{DiffEqual, deletion[:overlapLength2]} diffs = splice(diffs, pointer, 0, overlap)