Skip to content

Commit

Permalink
edit: create package
Browse files Browse the repository at this point in the history
Part of #18.

There are some unfortunate API changes as part of this,
such as the new EditScriptWithContextSize,
since edit.Script is now part of a different package.

Those will get ironed out as the refactoring continues.
  • Loading branch information
josharian committed Dec 30, 2019
1 parent 5319263 commit 7cc3ed0
Show file tree
Hide file tree
Showing 11 changed files with 248 additions and 210 deletions.
4 changes: 2 additions & 2 deletions cmd/pkg-diff-example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ func main() {
defer cancel()
}
e := diff.Myers(ctx, ab)
e = e.WithContextSize(*unified) // limit amount of output context
e = diff.EditScriptWithContextSize(e, *unified) // limit amount of output context
opts := []diff.WriteOpt{
diff.Names(aName, bName),
}
if *color {
opts = append(opts, diff.TerminalColor())
}
_, err = e.WriteUnified(os.Stdout, ab, opts...)
_, err = diff.WriteUnified(e, os.Stdout, ab, opts...)
check(err)
}
68 changes: 36 additions & 32 deletions context.go
Original file line number Diff line number Diff line change
@@ -1,89 +1,93 @@
package diff

import "fmt"
import (
"fmt"

"github.com/pkg/diff/edit"
)

// WithContextSize returns an edit script preserving only n common elements of context for changes.
// The returned edit script may alias the input.
// If n is negative, WithContextSize panics.
// To generate a "unified diff", use WithContextSize and then WriteUnified the resulting edit script.
func (e EditScript) WithContextSize(n int) EditScript {
func EditScriptWithContextSize(e edit.Script, n int) edit.Script {
if n < 0 {
panic(fmt.Sprintf("EditScript.WithContextSize called with negative n: %d", n))
}

// Handle small scripts.
switch len(e.IndexRanges) {
switch len(e.Ranges) {
case 0:
return EditScript{}
return edit.Script{}
case 1:
if e.IndexRanges[0].IsEqual() {
if e.Ranges[0].IsEqual() {
// Entirely identical contents.
// Unclear what to do here. For now, just bail.
// TODO: something else? what does command line diff do?
return EditScript{}
return edit.Script{}
}
return scriptWithIndexRanges(e.IndexRanges[0])
return edit.NewScript(e.Ranges[0])
}

out := make([]IndexRanges, 0, len(e.IndexRanges))
for i, seg := range e.IndexRanges {
out := make([]edit.Range, 0, len(e.Ranges))
for i, seg := range e.Ranges {
if !seg.IsEqual() {
out = append(out, seg)
continue
}
if i == 0 {
// Leading IndexRanges. Keep only the final n entries.
if seg.len() > n {
seg = indexRangesLastN(seg, n)
// Leading Range. Keep only the final n entries.
if seg.Len() > n {
seg = rangeLastN(seg, n)
}
out = append(out, seg)
continue
}
if i == len(e.IndexRanges)-1 {
// Trailing IndexRanges. Keep only the first n entries.
if seg.len() > n {
seg = indexRangesFirstN(seg, n)
if i == len(e.Ranges)-1 {
// Trailing Range. Keep only the first n entries.
if seg.Len() > n {
seg = rangeFirstN(seg, n)
}
out = append(out, seg)
continue
}
if seg.len() <= n*2 {
// Small middle IndexRanges. Keep unchanged.
if seg.Len() <= n*2 {
// Small middle Range. Keep unchanged.
out = append(out, seg)
continue
}
// Large middle IndexRanges. Break into two disjoint parts.
out = append(out, indexRangesFirstN(seg, n), indexRangesLastN(seg, n))
// Large middle Range. Break into two disjoint parts.
out = append(out, rangeFirstN(seg, n), rangeLastN(seg, n))
}

// TODO: Stock macOS diff also trims common blank lines
// from the beginning/end of eq IndexRangess.
// Perhaps we should do that here too.
// Or perhaps that should be a separate, composable EditScript method?
return EditScript{IndexRanges: out}
// Or perhaps that should be a separate, composable function?
return edit.Script{Ranges: out}
}

func indexRangesFirstN(seg IndexRanges, n int) IndexRanges {
func rangeFirstN(seg edit.Range, n int) edit.Range {
if !seg.IsEqual() {
panic("indexRangesFirstN bad op")
panic("rangeFirstN bad op")
}
if seg.len() < n {
panic("indexRangesFirstN bad Len")
if seg.Len() < n {
panic("rangeFirstN bad Len")
}
return IndexRanges{
return edit.Range{
LowA: seg.LowA, HighA: seg.LowA + n,
LowB: seg.LowB, HighB: seg.LowB + n,
}
}

func indexRangesLastN(seg IndexRanges, n int) IndexRanges {
func rangeLastN(seg edit.Range, n int) edit.Range {
if !seg.IsEqual() {
panic("indexRangesLastN bad op")
panic("rangeLastN bad op")
}
if seg.len() < n {
panic("indexRangesLastN bad Len")
if seg.Len() < n {
panic("rangeLastN bad Len")
}
return IndexRanges{
return edit.Range{
LowA: seg.HighA - n, HighA: seg.HighA,
LowB: seg.HighB - n, HighB: seg.HighB,
}
Expand Down
107 changes: 4 additions & 103 deletions diff.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package diff

import (
"bytes"
"fmt"
"io"

"github.com/pkg/diff/edit"
)

// A Pair is two things that can be diffed using the Myers diff algorithm.
Expand Down Expand Up @@ -48,112 +49,12 @@ type PairWriterTo interface {
// If you have paid the O(n) cost to intern all strings involved in both A and B,
// then string comparisons are reduced to cheap pointer comparisons.

// An op is a edit operation used to transform A into B.
type op int8

//go:generate stringer -type op

const (
del op = -1
eq op = 0
ins op = 1
)

// IndexRanges represents a pair of clopen index ranges.
// They represent elements A[LowA:HighA] and B[LowB:HighB].
type IndexRanges struct {
LowA, HighA int
LowB, HighB int
}

// IsInsert reports whether r represents an insertion in an EditScript.
// If so, the inserted elements are B[LowB:HighB].
func (r *IndexRanges) IsInsert() bool {
return r.LowA == r.HighA
}

// IsDelete reports whether r represents a deletion in an EditScript.
// If so, the deleted elements are A[LowA:HighA].
func (r *IndexRanges) IsDelete() bool {
return r.LowB == r.HighB
}

// IsEqual reports whether r represents a series of equal elements in an EditScript.
// If so, the elements A[LowA:HighA] are equal to the elements B[LowB:HighB].
func (r *IndexRanges) IsEqual() bool {
return r.HighB-r.LowB == r.HighA-r.LowA
}

func (r *IndexRanges) op() op {
if r.IsInsert() {
return ins
}
if r.IsDelete() {
return del
}
if r.IsEqual() {
return eq
}
panic("malformed IndexRanges")
}

func (s IndexRanges) debugString() string {
func rangeString(r edit.Range) string {
// This output is helpful when hacking on a Myers diff.
// In other contexts it is usually more natural to group LowA, HighA and LowB, HighB.
return fmt.Sprintf("(%d, %d) -- %s %d --> (%d, %d)", s.LowA, s.LowB, s.op(), s.len(), s.HighA, s.HighB)
}

func (s IndexRanges) len() int {
if s.LowA == s.HighA {
return s.HighB - s.LowB
}
return s.HighA - s.LowA
}

// An EditScript is an edit script to alter A into B.
type EditScript struct {
IndexRanges []IndexRanges
}

// IsIdentity reports whether e is the identity edit script, that is, whether A and B are identical.
// See the TestHelper example.
func (e EditScript) IsIdentity() bool {
for _, seg := range e.IndexRanges {
if !seg.IsEqual() {
return false
}
}
return true
}

// Stat reports the number of insertions and deletions in e.
func (e EditScript) Stat() (ins, del int) {
for _, r := range e.IndexRanges {
switch {
case r.IsDelete():
del += r.HighA - r.LowA
case r.IsInsert():
ins += r.HighB - r.LowB
}
}
return ins, del
return fmt.Sprintf("(%d, %d) -- %s %d --> (%d, %d)", r.LowA, r.LowB, r.Op(), r.Len(), r.HighA, r.HighB)
}

// TODO: consider adding an "it just works" test helper that accepts two slices (via interface{}),
// diffs them using Strings or Bytes or Slices (using reflect.DeepEqual) as appropriate,
// and calls t.Errorf with a generated diff if they're not equal.

// scriptWithIndexRanges returns an EditScript containing s.
// It is used to reduce line noise.
func scriptWithIndexRanges(s ...IndexRanges) EditScript {
return EditScript{IndexRanges: s}
}

// dump formats s for debugging.
func (e EditScript) dump() string {
buf := new(bytes.Buffer)
for _, seg := range e.IndexRanges {
fmt.Fprintln(buf, seg)
}
return buf.String()
}
118 changes: 118 additions & 0 deletions edit/edit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Package edit provides edit scripts.
// Edit scripts are a core notion for diffs.
// The represent a way to go from A to B by a sequence
// of insertions, deletions, and equal elements.
package edit

import (
"fmt"
"strings"
)

// A Script is an edit script to alter A into B.
type Script struct {
Ranges []Range
}

// NewScript returns a Script containing the ranges r.
// It is only a convenience wrapper used to reduce line noise.
func NewScript(r ...Range) Script {
return Script{Ranges: r}
}

// IsIdentity reports whether s is the identity edit script,
// that is, whether A and B are identical.
func (s *Script) IsIdentity() bool {
for _, r := range s.Ranges {
if !r.IsEqual() {
return false
}
}
return true
}

// Stat reports the number of insertions and deletions in s.
func (s *Script) Stat() (ins, del int) {
for _, r := range s.Ranges {
switch {
case r.IsDelete():
del += r.HighA - r.LowA
case r.IsInsert():
ins += r.HighB - r.LowB
}
}
return ins, del
}

// dump formats s for debugging.
func (s *Script) dump() string {
buf := new(strings.Builder)
for _, r := range s.Ranges {
fmt.Fprintln(buf, r)
}
return buf.String()
}

// A Range is a pair of clopen index ranges.
// It represents the elements A[LowA:HighA] and B[LowB:HighB].
type Range struct {
LowA, HighA int
LowB, HighB int
}

// IsInsert reports whether r represents an insertion in a Script.
// If so, the inserted elements are B[LowB:HighB].
func (r *Range) IsInsert() bool {
return r.LowA == r.HighA
}

// IsDelete reports whether r represents a deletion in a Script.
// If so, the deleted elements are A[LowA:HighA].
func (r *Range) IsDelete() bool {
return r.LowB == r.HighB
}

// IsEqual reports whether r represents a series of equal elements in a Script.
// If so, the elements A[LowA:HighA] are equal to the elements B[LowB:HighB].
func (r *Range) IsEqual() bool {
return r.HighB-r.LowB == r.HighA-r.LowA
}

// An Op is a edit operation used to transform A into B.
type Op int8

//go:generate stringer -type Op

const (
Del Op = -1 // delete
Eq Op = 0 // equal
Ins Op = 1 // insert
)

// Op reports what kind of operation r represents.
// This can also be determined by calling r.IsInsert,
// r.IsDelete, and r.IsEqual,
// but this form is sometimes more convenient to use.
func (r *Range) Op() Op {
if r.IsInsert() {
return Ins
}
if r.IsDelete() {
return Del
}
if r.IsEqual() {
return Eq
}
panic("malformed Range")
}

// Len reports the number of elements in r.
// In a deletion, it is the number of deleted elements.
// In an insertion, it is the number of inserted elements.
// For equal elements, it is the number of equal elements.
func (r *Range) Len() int {
if r.LowA == r.HighA {
return r.HighB - r.LowB
}
return r.HighA - r.LowA
}
Loading

0 comments on commit 7cc3ed0

Please sign in to comment.