Skip to content

Commit

Permalink
Merge pull request #108 from pakohan/master
Browse files Browse the repository at this point in the history
fix DiffCleanupSemantic
  • Loading branch information
sergi authored Oct 20, 2020
2 parents a87b244 + e013302 commit df97e07
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 9 deletions.
18 changes: 9 additions & 9 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,16 +670,16 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
// An insertion or deletion.

if diffs[pointer].Type == DiffInsert {
lengthInsertions2 += len(diffs[pointer].Text)
lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text)
} else {
lengthDeletions2 += len(diffs[pointer].Text)
lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text)
}
// Eliminate an equality that is smaller or equal to the edits on both sides of it.
difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1)))
difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2)))
if len(lastequality) > 0 &&
(len(lastequality) <= difference1) &&
(len(lastequality) <= difference2) {
if utf8.RuneCountInString(lastequality) > 0 &&
(utf8.RuneCountInString(lastequality) <= difference1) &&
(utf8.RuneCountInString(lastequality) <= difference2) {
// Duplicate record.
insPoint := equalities[len(equalities)-1]
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
Expand Down Expand Up @@ -728,8 +728,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion)
overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion)
if overlapLength1 >= overlapLength2 {
if float64(overlapLength1) >= float64(len(deletion))/2 ||
float64(overlapLength1) >= float64(len(insertion))/2 {
if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 ||
float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 {

// Overlap found. Insert an equality and trim the surrounding edits.
diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
Expand All @@ -739,8 +739,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
pointer++
}
} else {
if float64(overlapLength2) >= float64(len(deletion))/2 ||
float64(overlapLength2) >= float64(len(insertion))/2 {
if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 ||
float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 {
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
diffs = splice(diffs, pointer, 0, overlap)
Expand Down
37 changes: 37 additions & 0 deletions diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,43 @@ func TestDiffCleanupSemantic(t *testing.T) {
{DiffDelete, " deal"},
},
},
{
"Taken from python / CPP library",
[]Diff{
{DiffInsert, "星球大戰:新的希望 "},
{DiffEqual, "star wars: "},
{DiffDelete, "episodio iv - un"},
{DiffEqual, "a n"},
{DiffDelete, "u"},
{DiffEqual, "e"},
{DiffDelete, "va"},
{DiffInsert, "w"},
{DiffEqual, " "},
{DiffDelete, "es"},
{DiffInsert, "ho"},
{DiffEqual, "pe"},
{DiffDelete, "ranza"},
},
[]Diff{
{DiffInsert, "星球大戰:新的希望 "},
{DiffEqual, "star wars: "},
{DiffDelete, "episodio iv - una nueva esperanza"},
{DiffInsert, "a new hope"},
},
},
{
"panic",
[]Diff{
{DiffInsert, "킬러 인 "},
{DiffEqual, "리커버리"},
{DiffDelete, " 보이즈"},
},
[]Diff{
{DiffInsert, "킬러 인 "},
{DiffEqual, "리커버리"},
{DiffDelete, " 보이즈"},
},
},
} {
actual := dmp.DiffCleanupSemantic(tc.Diffs)
assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name))
Expand Down

0 comments on commit df97e07

Please sign in to comment.